Raghavan
/

indictrans2-indic-en-dist-200M

text2text-generation

Model card Files Files and versions Community

Raghavan commited on Dec 6, 2023

Commit

814f361

•

1 Parent(s): 359d319

Upload 7 files

Files changed (1) hide show

modeling_indictrans.py +13 -0

modeling_indictrans.py CHANGED Viewed

@@ -606,6 +606,17 @@ class IndicTransEncoder(IndicTransPreTrainedModel):
         # Initialize weights and apply final processing
         self.post_init()
     def forward(
         self,
         input_ids: Optional[torch.Tensor] = None,
@@ -745,6 +756,8 @@ class IndicTransEncoder(IndicTransPreTrainedModel):
         if output_hidden_states:
             encoder_states = encoder_states + (hidden_states,)
         if not return_dict:
             return tuple(v for v in [hidden_states, encoder_states, all_attentions] if v is not None)
         return BaseModelOutput(

         # Initialize weights and apply final processing
         self.post_init()
+    def get_pooled_representation(self, hidden_states, attention_mask):
+        seqs = torch.clone(hidden_states)
+        seqs[attention_mask == 0] = 0
+        sentence_embedding = seqs.sum(dim=1)
+        weights = 1.0 / ((attention_mask != 0).float().sum(dim=1) + 1e-7)
+        sentence_embedding = torch.einsum(
+            "i...,i ->i...", sentence_embedding, weights
+        )
+        return sentence_embedding
     def forward(
         self,
         input_ids: Optional[torch.Tensor] = None,
         if output_hidden_states:
             encoder_states = encoder_states + (hidden_states,)
+        hidden_states = self.get_pooled_representation(hidden_states, attention_mask)
         if not return_dict:
             return tuple(v for v in [hidden_states, encoder_states, all_attentions] if v is not None)
         return BaseModelOutput(