babylm
/

flamingo-2024

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

Chengxu Zhuang commited on Aug 15

Commit

272b3c6

•

1 Parent(s): ee546e1

minor fix for causal mask

Files changed (1) hide show

modeling_flamingo.py +14 -3

modeling_flamingo.py CHANGED Viewed

@@ -14,6 +14,12 @@ import transformers.models.opt.modeling_opt as modeling_opt
 from transformers.models.opt.modeling_opt\
         import OPTDecoderLayer, OPTPreTrainedModel, OPTConfig
 from transformers import ViTModel
 from .utils import exists, freeze_all_layers_, unfreeze_all_layers_
 from .flamingo_pytorch import GatedCrossAttentionBlock, PerceiverResampler
 from .configuration_flamingo import FlamingoConfig
@@ -232,9 +238,14 @@ class OPTDecoder(modeling_opt.OPTDecoder):
             attention_mask = torch.ones(inputs_embeds.shape[:2], dtype=torch.bool, device=inputs_embeds.device)
         pos_embeds = self.embed_positions(attention_mask, past_key_values_length)
-        attention_mask = self._prepare_decoder_attention_mask(
-            attention_mask, input_shape, inputs_embeds, past_key_values_length
-        )
         if self.project_in is not None:
             inputs_embeds = self.project_in(inputs_embeds)

 from transformers.models.opt.modeling_opt\
         import OPTDecoderLayer, OPTPreTrainedModel, OPTConfig
 from transformers import ViTModel
+try:
+    from transformers.models.opt.modeling_opt import _prepare_4d_causal_attention_mask
+except:
+    _prepare_4d_causal_attention_mask = None
 from .utils import exists, freeze_all_layers_, unfreeze_all_layers_
 from .flamingo_pytorch import GatedCrossAttentionBlock, PerceiverResampler
 from .configuration_flamingo import FlamingoConfig
             attention_mask = torch.ones(inputs_embeds.shape[:2], dtype=torch.bool, device=inputs_embeds.device)
         pos_embeds = self.embed_positions(attention_mask, past_key_values_length)
+        if _prepare_4d_causal_attention_mask is None:
+            attention_mask = self._prepare_decoder_attention_mask(
+                attention_mask, input_shape, inputs_embeds, past_key_values_length
+            )
+        else:
+            attention_mask = _prepare_4d_causal_attention_mask(
+                attention_mask, input_shape, inputs_embeds, past_key_values_length
+            )
         if self.project_in is not None:
             inputs_embeds = self.project_in(inputs_embeds)