Jackmin108
commited on
merge changes
Browse filesSigned-off-by: Meow <[email protected]>
- embedding.py +0 -2
- mha.py +1 -0
- modeling_xlm_roberta.py +0 -1
embedding.py
CHANGED
@@ -59,7 +59,6 @@ class XLMRobertaEmbeddings(nn.Module):
|
|
59 |
embeddings[task_indices] = task_embeddings
|
60 |
else:
|
61 |
embeddings = self.word_embeddings(input_ids)
|
62 |
-
|
63 |
if self.max_position_embeddings > 0:
|
64 |
if position_ids is None:
|
65 |
position_ids = create_position_ids_from_input_ids(input_ids, padding_idx=self.word_embeddings.padding_idx).to(input_ids.device)
|
@@ -79,5 +78,4 @@ class XLMRobertaEmbeddings(nn.Module):
|
|
79 |
else:
|
80 |
token_type_embeddings = self.token_type_embeddings(token_type_ids)
|
81 |
embeddings = embeddings + token_type_embeddings
|
82 |
-
|
83 |
return embeddings
|
|
|
59 |
embeddings[task_indices] = task_embeddings
|
60 |
else:
|
61 |
embeddings = self.word_embeddings(input_ids)
|
|
|
62 |
if self.max_position_embeddings > 0:
|
63 |
if position_ids is None:
|
64 |
position_ids = create_position_ids_from_input_ids(input_ids, padding_idx=self.word_embeddings.padding_idx).to(input_ids.device)
|
|
|
78 |
else:
|
79 |
token_type_embeddings = self.token_type_embeddings(token_type_ids)
|
80 |
embeddings = embeddings + token_type_embeddings
|
|
|
81 |
return embeddings
|
mha.py
CHANGED
@@ -643,6 +643,7 @@ class MHA(nn.Module):
|
|
643 |
inference_params.max_sequence_len if inference_params is not None else max_seqlen
|
644 |
)
|
645 |
batch, seqlen = x.shape[:2]
|
|
|
646 |
if not self.cross_attn and self.num_heads_kv == self.num_heads:
|
647 |
assert x_kv is None and mixer_subset is None
|
648 |
|
|
|
643 |
inference_params.max_sequence_len if inference_params is not None else max_seqlen
|
644 |
)
|
645 |
batch, seqlen = x.shape[:2]
|
646 |
+
lora_kwargs = {}
|
647 |
if not self.cross_attn and self.num_heads_kv == self.num_heads:
|
648 |
assert x_kv is None and mixer_subset is None
|
649 |
|
modeling_xlm_roberta.py
CHANGED
@@ -213,7 +213,6 @@ class XLMRobertaEncoder(nn.Module):
|
|
213 |
mixer_kwargs = {'adapter_mask': adapter_mask}
|
214 |
if key_padding_mask is not None:
|
215 |
mixer_kwargs['key_padding_mask'] = key_padding_mask.bool()
|
216 |
-
|
217 |
for layer in self.layers:
|
218 |
if self._grad_checkpointing:
|
219 |
hidden_states = torch.utils.checkpoint.checkpoint(
|
|
|
213 |
mixer_kwargs = {'adapter_mask': adapter_mask}
|
214 |
if key_padding_mask is not None:
|
215 |
mixer_kwargs['key_padding_mask'] = key_padding_mask.bool()
|
|
|
216 |
for layer in self.layers:
|
217 |
if self._grad_checkpointing:
|
218 |
hidden_states = torch.utils.checkpoint.checkpoint(
|