Crystalcareai
/

GemMoE-Beta-1

Text Generation

Model card Files Files and versions Community

Crystalcareai commited on Mar 14, 2024

Commit

e8a1698

·

verified ·

1 Parent(s): 1b5a82b

Update modeling_gemmoe.py

Files changed (1) hide show

modeling_gemmoe.py +5 -0

modeling_gemmoe.py CHANGED Viewed

@@ -1215,6 +1215,10 @@ class GemmoeForCausalLM(GemmoePreTrainedModel):
         )
         hidden_states = outputs[0]
         logits = self.lm_head(hidden_states)
         logits = logits.float()
@@ -1332,6 +1336,7 @@ class GemmoeForCausalLM(GemmoePreTrainedModel):
                 tuple(past_state.index_select(0, beam_idx.to(past_state.device)) for past_state in layer_past),
             )
         return reordered_past
 @add_start_docstrings(
     """
     The Gemmoe Model transformer with a sequence classification head on top (linear layer).

         )
         hidden_states = outputs[0]
+        # Ensure hidden_states and lm_head have compatible dtypes
+        hidden_states = hidden_states.to(dtype=self.lm_head.weight.dtype)
         logits = self.lm_head(hidden_states)
         logits = logits.float()
                 tuple(past_state.index_select(0, beam_idx.to(past_state.device)) for past_state in layer_past),
             )
         return reordered_past
 @add_start_docstrings(
     """
     The Gemmoe Model transformer with a sequence classification head on top (linear layer).