Toby0830
/

internlm2_5-7b-chat-1m-gptq-8bits

Feature Extraction

8-bit precision

Model card Files Files and versions Community

Toby0830 commited on Jul 16, 2024

Commit

5af2fa9

·

verified ·

1 Parent(s): 8f63abe

Upload InternLM2ForCausalLM

Files changed (1) hide show

modeling_internlm2.py +1 -1

modeling_internlm2.py CHANGED Viewed

@@ -390,7 +390,7 @@ class InternLM2Attention(nn.Module):
         #             attn_weights_i = attn_weights_i + causal_mask_i
         #         # Upcast attention to fp32 and apply softmax
-        #         attn_weights_i = F.softmax(attn_weights_i, dim=-1, dtype=torch.float32).to(query_states_i.dtype)
         #         attn_output_i = torch.matmul(attn_weights_i, value_states.to(device))
         #         return attn_output_i

         #             attn_weights_i = attn_weights_i + causal_mask_i
         #         # Upcast attention to fp32 and apply softmax
+        #         attn_weights_i = nn.functional.softmax(attn_weights_i, dim=-1, dtype=torch.float32).to(query_states_i.dtype)
         #         attn_output_i = torch.matmul(attn_weights_i, value_states.to(device))
         #         return attn_output_i