yangwang825 commited on
Commit
d3aaaf9
·
1 Parent(s): d1b0c84

Upload BertForSequenceClassification

Browse files
Files changed (3) hide show
  1. config.json +6 -1
  2. modeling_bert.py +79 -1
  3. pytorch_model.bin +1 -1
config.json CHANGED
@@ -1,8 +1,12 @@
1
  {
2
  "affine": true,
 
 
 
3
  "attention_probs_dropout_prob": 0.1,
4
  "auto_map": {
5
- "AutoConfig": "configuration_bert.BertConfig"
 
6
  },
7
  "classifier_dropout": null,
8
  "hidden_act": "gelu",
@@ -17,6 +21,7 @@
17
  "num_hidden_layers": 12,
18
  "pad_token_id": 0,
19
  "position_embedding_type": "absolute",
 
20
  "transformers_version": "4.33.3",
21
  "type_vocab_size": 2,
22
  "use_cache": true,
 
1
  {
2
  "affine": true,
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "auto_map": {
8
+ "AutoConfig": "configuration_bert.BertConfig",
9
+ "AutoModelForSequenceClassification": "modeling_bert.BertForSequenceClassification"
10
  },
11
  "classifier_dropout": null,
12
  "hidden_act": "gelu",
 
21
  "num_hidden_layers": 12,
22
  "pad_token_id": 0,
23
  "position_embedding_type": "absolute",
24
+ "torch_dtype": "float32",
25
  "transformers_version": "4.33.3",
26
  "type_vocab_size": 2,
27
  "use_cache": true,
modeling_bert.py CHANGED
@@ -16,7 +16,8 @@ from transformers.models.bert.modeling_bert import (
16
  )
17
  from transformers.modeling_outputs import (
18
  BaseModelOutputWithPoolingAndCrossAttentions,
19
- SequenceClassifierOutput
 
20
  )
21
 
22
  from .configuration_bert import BertConfig
@@ -293,3 +294,80 @@ class BertForSequenceClassification(BertPreTrainedModel):
293
  hidden_states=outputs.hidden_states,
294
  attentions=outputs.attentions,
295
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  )
17
  from transformers.modeling_outputs import (
18
  BaseModelOutputWithPoolingAndCrossAttentions,
19
+ SequenceClassifierOutput,
20
+ MultipleChoiceModelOutput
21
  )
22
 
23
  from .configuration_bert import BertConfig
 
294
  hidden_states=outputs.hidden_states,
295
  attentions=outputs.attentions,
296
  )
297
+
298
+
299
+ class BertForMultipleChoice(BertPreTrainedModel):
300
+
301
+ def __init__(self, config):
302
+ super().__init__(config)
303
+
304
+ self.bert = BertModel(config)
305
+ classifier_dropout = (
306
+ config.classifier_dropout if config.classifier_dropout is not None else config.hidden_dropout_prob
307
+ )
308
+ self.dropout = nn.Dropout(classifier_dropout)
309
+ self.classifier = nn.Linear(config.hidden_size, 1)
310
+
311
+ # Initialize weights and apply final processing
312
+ self.post_init()
313
+
314
+ def forward(
315
+ self,
316
+ input_ids: Optional[torch.Tensor] = None,
317
+ attention_mask: Optional[torch.Tensor] = None,
318
+ token_type_ids: Optional[torch.Tensor] = None,
319
+ position_ids: Optional[torch.Tensor] = None,
320
+ head_mask: Optional[torch.Tensor] = None,
321
+ inputs_embeds: Optional[torch.Tensor] = None,
322
+ labels: Optional[torch.Tensor] = None,
323
+ output_attentions: Optional[bool] = None,
324
+ output_hidden_states: Optional[bool] = None,
325
+ return_dict: Optional[bool] = None,
326
+ ) -> Union[Tuple[torch.Tensor], MultipleChoiceModelOutput]:
327
+
328
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
329
+ num_choices = input_ids.shape[1] if input_ids is not None else inputs_embeds.shape[1]
330
+
331
+ input_ids = input_ids.view(-1, input_ids.size(-1)) if input_ids is not None else None
332
+ attention_mask = attention_mask.view(-1, attention_mask.size(-1)) if attention_mask is not None else None
333
+ token_type_ids = token_type_ids.view(-1, token_type_ids.size(-1)) if token_type_ids is not None else None
334
+ position_ids = position_ids.view(-1, position_ids.size(-1)) if position_ids is not None else None
335
+ inputs_embeds = (
336
+ inputs_embeds.view(-1, inputs_embeds.size(-2), inputs_embeds.size(-1))
337
+ if inputs_embeds is not None
338
+ else None
339
+ )
340
+
341
+ outputs = self.bert(
342
+ input_ids,
343
+ attention_mask=attention_mask,
344
+ token_type_ids=token_type_ids,
345
+ position_ids=position_ids,
346
+ head_mask=head_mask,
347
+ inputs_embeds=inputs_embeds,
348
+ output_attentions=output_attentions,
349
+ output_hidden_states=output_hidden_states,
350
+ return_dict=return_dict,
351
+ )
352
+
353
+ pooled_output = outputs[1]
354
+
355
+ pooled_output = self.dropout(pooled_output)
356
+ logits = self.classifier(pooled_output)
357
+ reshaped_logits = logits.view(-1, num_choices)
358
+
359
+ loss = None
360
+ if labels is not None:
361
+ loss_fct = nn.CrossEntropyLoss()
362
+ loss = loss_fct(reshaped_logits, labels)
363
+
364
+ if not return_dict:
365
+ output = (reshaped_logits,) + outputs[2:]
366
+ return ((loss,) + output) if loss is not None else output
367
+
368
+ return MultipleChoiceModelOutput(
369
+ loss=loss,
370
+ logits=reshaped_logits,
371
+ hidden_states=outputs.hidden_states,
372
+ attentions=outputs.attentions,
373
+ )
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:994c6ca6ce911a444ebac151fbe611c63ba1d26c14438d690b2ea071958f419a
3
  size 438000689
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0e4d45385d6072711fe037338b7d41b4c82e7310bfec45c493ee84f649432b7
3
  size 438000689