Lurunchik commited on
Commit
bf1ebb5
·
1 Parent(s): 122bc3b

add model code

Browse files
Files changed (4) hide show
  1. .gitattributes +1 -0
  2. config.json +5 -0
  3. model.py +0 -0
  4. nfqa_model.py +105 -0
.gitattributes CHANGED
@@ -25,3 +25,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
config.json CHANGED
@@ -2,6 +2,10 @@
2
  "architectures": [
3
  "RobertaNFQAClassification"
4
  ],
 
 
 
 
5
  "attention_probs_dropout_prob": 0.1,
6
  "bos_token_id": 0,
7
  "eos_token_id": 2,
@@ -40,6 +44,7 @@
40
  "num_hidden_layers": 12,
41
  "pad_token_id": 1,
42
  "position_embedding_type": "absolute",
 
43
  "transformers_version": "4.2.2",
44
  "type_vocab_size": 1,
45
  "use_cache": true,
 
2
  "architectures": [
3
  "RobertaNFQAClassification"
4
  ],
5
+ "auto_map": {
6
+ "AutoConfig": "RobertaConfig",
7
+ "AutoModelForImageClassification": "nfqa_model.RobertaNFQAClassification"
8
+ },
9
  "attention_probs_dropout_prob": 0.1,
10
  "bos_token_id": 0,
11
  "eos_token_id": 2,
 
44
  "num_hidden_layers": 12,
45
  "pad_token_id": 1,
46
  "position_embedding_type": "absolute",
47
+ "problem_type": "single_label_classification",
48
  "transformers_version": "4.2.2",
49
  "type_vocab_size": 1,
50
  "use_cache": true,
model.py DELETED
File without changes
nfqa_model.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Sequence, Optional, Union, Tuple
2
+
3
+ import torch
4
+ from torch import nn
5
+ from torch.nn import functional, CrossEntropyLoss
6
+ from transformers import RobertaConfig
7
+ from transformers.modeling_outputs import SequenceClassifierOutput
8
+ from transformers.models.roberta.modeling_roberta import RobertaModel, RobertaPreTrainedModel, RobertaPooler
9
+
10
+
11
+ class MishActivation(nn.Module):
12
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
13
+ return x * torch.tanh(torch.nn.functional.softplus(x))
14
+
15
+
16
+ class NFQAClassificationHead(nn.Module):
17
+ def __init__(
18
+ self, input_dim: int, num_labels: int, hidden_dims: Sequence[int] = (768, 512), dropout: float = 0.0,
19
+ ) -> None:
20
+ super().__init__()
21
+
22
+ self.linear_layers = nn.Sequential(
23
+ *(nn.Linear(input_dim, dim) for dim in hidden_dims)
24
+ )
25
+ self.classification_layer = torch.nn.Linear(hidden_dims[-1], num_labels)
26
+ self.activations = [MishActivation()] * len(hidden_dims)
27
+ self.dropouts = [torch.nn.Dropout(p=dropout)] * len(hidden_dims)
28
+
29
+ def forward(self, inputs: torch.Tensor) -> torch.Tensor:
30
+ output = inputs
31
+ for layer, activation, dropout in zip(
32
+ self.linear_layers, self.activations, self.dropouts
33
+ ):
34
+ output = dropout(activation(layer(output)))
35
+ return self.classification_layer(output)
36
+
37
+
38
+ class RobertaNFQAClassification(RobertaPreTrainedModel):
39
+ _keys_to_ignore_on_load_missing = [r"position_ids"]
40
+ _DROPOUT = 0.0
41
+
42
+ def __init__(self, config: RobertaConfig):
43
+ super().__init__(config)
44
+ self.num_labels = config.num_labels
45
+ self.config = config
46
+
47
+ self.embedder = RobertaModel(config, add_pooling_layer=True)
48
+ self.pooler = RobertaPooler(config)
49
+ self.feedforward = NFQAClassificationHead(config.hidden_size, config.num_labels)
50
+ self.dropout = torch.nn.Dropout(self._DROPOUT)
51
+
52
+ self.init_weights()
53
+
54
+
55
+ def forward(
56
+ self,
57
+ input_ids: Optional[torch.LongTensor] = None,
58
+ attention_mask: Optional[torch.FloatTensor] = None,
59
+ token_type_ids: Optional[torch.LongTensor] = None,
60
+ position_ids: Optional[torch.LongTensor] = None,
61
+ head_mask: Optional[torch.FloatTensor] = None,
62
+ inputs_embeds: Optional[torch.FloatTensor] = None,
63
+ labels: Optional[torch.LongTensor] = None,
64
+ output_attentions: Optional[bool] = None,
65
+ output_hidden_states: Optional[bool] = None,
66
+ return_dict: Optional[bool] = None,
67
+ ) -> Union[Tuple[torch.Tensor, ...], SequenceClassifierOutput]:
68
+ r"""
69
+ labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
70
+ Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
71
+ config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
72
+ `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
73
+ """
74
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
75
+
76
+ outputs = self.embedder(
77
+ input_ids,
78
+ attention_mask=attention_mask,
79
+ token_type_ids=token_type_ids,
80
+ position_ids=position_ids,
81
+ head_mask=head_mask,
82
+ inputs_embeds=inputs_embeds,
83
+ output_attentions=output_attentions,
84
+ output_hidden_states=output_hidden_states,
85
+ return_dict=return_dict,
86
+ )
87
+ sequence_output = outputs[0]
88
+
89
+ logits = self.feedforward(self.dropout(self.pooler(sequence_output)))
90
+
91
+ loss = None
92
+ if labels is not None:
93
+ loss_fct = CrossEntropyLoss()
94
+ loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
95
+
96
+ if not return_dict:
97
+ output = (logits,) + outputs[2:]
98
+ return ((loss,) + output) if loss is not None else output
99
+
100
+ return SequenceClassifierOutput(
101
+ loss=loss,
102
+ logits=logits,
103
+ hidden_states=outputs.hidden_states,
104
+ attentions=outputs.attentions,
105
+ )