PeteBleackley commited on
Commit
b2593fa
·
1 Parent(s): 7cc6121

Making sure RoBERTa layers have all required arguments

Browse files
qarac/corpora/CombinedCorpus.py CHANGED
@@ -185,7 +185,11 @@ class CombinedCorpus(keras.utils.Sequence):
185
  maxlen = max((len(sample) for sample in batch))
186
  for sample in batch:
187
  sample.pad(maxlen,pad_id=self.pad_token)
188
- return tensorflow.constant([sample.ids
189
- for sample in batch])
 
 
 
 
190
 
191
 
 
185
  maxlen = max((len(sample) for sample in batch))
186
  for sample in batch:
187
  sample.pad(maxlen,pad_id=self.pad_token)
188
+ input_ids = tensorflow.constant([sample.ids
189
+ for sample in batch])
190
+ attention_mask = tensorflow.constant(input_ids.numpy().apply(lambda x: 0.0 if x==self.pad_token
191
+ else 1.0))
192
+ return {'input_ids':input_ids,
193
+ 'attention_mask':attention_mask}
194
 
195
 
qarac/models/QaracDecoderModel.py CHANGED
@@ -49,7 +49,13 @@ class QaracDecoderHead(keras.layers.Layer):
49
  """
50
  self.built = True
51
 
52
- def call(self,inputs):
 
 
 
 
 
 
53
  """
54
  Predicts text fron vector and hidden states of base model
55
 
@@ -64,9 +70,20 @@ class QaracDecoderHead(keras.layers.Layer):
64
  Predicted text
65
 
66
  """
67
- vectors = self.concat(inputs)
68
- l0 = self.layer_0(vectors)
69
- return self.head(self.layer1(l0.last_hidden_state[:,1:]))
 
 
 
 
 
 
 
 
 
 
 
70
 
71
  class QaracDecoderModel(transformers.TFPreTrainedModel,transformers.generation_tf_utils.TFGenerationMixin):
72
 
@@ -114,7 +131,8 @@ class QaracDecoderModel(transformers.TFPreTrainedModel,transformers.generation_t
114
  (v,s) = (kwargs['vector'],inputs) if 'vector' in kwargs else inputs
115
 
116
  return self.decoder_head((tensorflow.expand_dims(v,1),
117
- self.base_model(s).last_hidden_state))
 
118
 
119
 
120
 
 
49
  """
50
  self.built = True
51
 
52
+
53
+
54
+
55
+ def call(self,
56
+ vector,
57
+ hidden_states,
58
+ attention_mask=None,training=False):
59
  """
60
  Predicts text fron vector and hidden states of base model
61
 
 
70
  Predicted text
71
 
72
  """
73
+ vectors = self.concat(vector, hidden_states)
74
+ attentions = attention_mask if attention_mask is None else self.concat(tensorflow.ones((hidden_states.shape(0),
75
+ 1)),
76
+ attention_mask)
77
+ l0 = self.layer_0(vectors,
78
+ attentions,
79
+ None,
80
+ False,
81
+ training)
82
+ return self.head(self.layer1(l0.last_hidden_state[:,1:],
83
+ attention_mask,
84
+ None,
85
+ False,
86
+ training))
87
 
88
  class QaracDecoderModel(transformers.TFPreTrainedModel,transformers.generation_tf_utils.TFGenerationMixin):
89
 
 
131
  (v,s) = (kwargs['vector'],inputs) if 'vector' in kwargs else inputs
132
 
133
  return self.decoder_head((tensorflow.expand_dims(v,1),
134
+ self.base_model(s)),
135
+ training = kwargs.get('training',False))
136
 
137
 
138
 
qarac/models/QaracEncoderModel.py CHANGED
@@ -45,7 +45,9 @@ class QaracEncoderModel(transformers.TFPreTrainedModel):
45
  """
46
  self.built=True
47
 
48
- def call(self,inputs):
 
 
49
  """
50
  Vectorizes a tokenised text
51
 
@@ -61,7 +63,11 @@ class QaracEncoderModel(transformers.TFPreTrainedModel):
61
 
62
  """
63
 
64
- return self.head(self.base_model(inputs).last_hidden_state)
 
 
 
 
65
 
66
 
67
 
 
45
  """
46
  self.built=True
47
 
48
+ def call(self,input_ids,
49
+ attention_mask=None,
50
+ training=False):
51
  """
52
  Vectorizes a tokenised text
53
 
 
63
 
64
  """
65
 
66
+ return self.head(self.base_model(input_ids,
67
+ attention_mask,
68
+ training=training).last_hidden_state,
69
+ attention_mask,
70
+ training)
71
 
72
 
73
 
qarac/models/layers/GlobalAttentionPoolingHead.py CHANGED
@@ -57,7 +57,7 @@ class GlobalAttentionPoolingHead(keras.layers.Layer):
57
  self.local_projection,
58
  axes=1)
59
 
60
- def call(self,X,training=None):
61
  """
62
 
63
 
@@ -65,6 +65,8 @@ class GlobalAttentionPoolingHead(keras.layers.Layer):
65
  ----------
66
  X : tensorflow.Tensor
67
  Base model vectors to apply pooling to.
 
 
68
  training : bool, optional
69
  Not used. The default is None.
70
 
@@ -83,5 +85,7 @@ class GlobalAttentionPoolingHead(keras.layers.Layer):
83
  X),
84
  axis=2)
85
  attention = tensorflow.vectorized_map(dot_prod,(lp,gp))
 
 
86
  return tensorflow.vectorized_map(dot_prod,
87
- (attention,X))
 
57
  self.local_projection,
58
  axes=1)
59
 
60
+ def call(self,X,attention_mask=None,training=None):
61
  """
62
 
63
 
 
65
  ----------
66
  X : tensorflow.Tensor
67
  Base model vectors to apply pooling to.
68
+ attention_mask: tensorflow.Tensor, optional
69
+ mask for pad values
70
  training : bool, optional
71
  Not used. The default is None.
72
 
 
85
  X),
86
  axis=2)
87
  attention = tensorflow.vectorized_map(dot_prod,(lp,gp))
88
+ if attention_mask is None:
89
+ attention_mask = tensorflow.ones_like(attention)
90
  return tensorflow.vectorized_map(dot_prod,
91
+ (attention * attention_mask,X))