import torch.nn as nn from transformers import AutoConfig, AutoModel, PreTrainedModel from transformers.modeling_outputs import SequenceClassifierOutput from huggingface_hub import PyTorchModelHubMixin class InjecGuard(PreTrainedModel, PyTorchModelHubMixin): config_class = AutoConfig def __init__(self, config): super(InjecGuard, self).__init__(config) self.config = config # Load the base model self.deberta = AutoModel.from_pretrained(config._name_or_path) # Define the classifier on top of the base model's hidden state self.classifier = nn.Linear(self.deberta.config.hidden_size, config.num_labels) def forward(self, input_ids, attention_mask, **kwargs): # Get the outputs from the base model outputs = self.deberta(input_ids=input_ids, attention_mask=attention_mask)['last_hidden_state'] # Use the [CLS] token (first token) for classification pooled_output = outputs[:, 0, :] # Pass through classifier logits = self.classifier(pooled_output) return SequenceClassifierOutput(logits=logits)