apple
/

OpenELM-1_1B

Fix For NaN Logits in HuggingFace Distribution of OpenELM

by jasonkrone - opened Sep 13, 2024

←

Files changed (1) hide show

modeling_openelm.py CHANGED Viewed

@@ -766,7 +766,7 @@ class OpenELMModel(OpenELMPreTrainedModel):
             )
         # We use the current dtype to avoid any overflows
-        min_dtype = torch.finfo(dtype).min
         causal_mask = (
             self.causal_mask[None, None, :, :].repeat(batch_size, 1, 1, 1).to(dtype)
             * min_dtype

             )
         # We use the current dtype to avoid any overflows
+        min_dtype = torch.finfo(dtype).min / 2
         causal_mask = (
             self.causal_mask[None, None, :, :].repeat(batch_size, 1, 1, 1).to(dtype)
             * min_dtype