magicslabnu
/

OutEffHop-opt-125m

Text Generation

text-generation-inference

Model card Files Files and versions Community

robinzixuan commited on Jun 14, 2024

Commit

3561472

·

verified ·

1 Parent(s): 423395a

Update modeling_opt.py

Files changed (1) hide show

modeling_opt.py +5 -4

modeling_opt.py CHANGED Viewed

@@ -133,7 +133,8 @@ def softmax_1(input: torch.Tensor, dim=-1, dtype=torch.float32) -> torch.Tensor:
     """
     $\text(softmax)_n(x_i) = exp(x_i) / (1 + \sum_j exp(x_j))$
     """
-    return softmax_n_shifted_zeros(input, 1, dim=dim)
 class OPTAttention(nn.Module):
@@ -151,7 +152,7 @@ class OPTAttention(nn.Module):
         self.num_heads = config.num_attention_heads
         self.dropout = config.attention_dropout
         self.enable_bias = config.enable_bias
         self.head_dim = self.embed_dim // self.num_heads
         self.is_causal = True
@@ -327,7 +328,7 @@ class OPTOutEffHop(OPTAttention):
         self.num_heads = config.num_attention_heads
         self.dropout = config.attention_dropout
         self.enable_bias = config.enable_bias
         self.head_dim = self.embed_dim // self.num_heads
         self.is_causal = True
@@ -488,7 +489,7 @@ class OPTOutEffHop(OPTAttention):
         return attn_output, attn_weights_reshaped, past_key_value
-class OptFlashAttention2(OPTAttention):
     """
     OPT flash attention module. This module inherits from `OPTAttention` as the weights of the module stays untouched.
     The only required change would be on the forward pass where it needs to correctly call the public API of flash

     """
     $\text(softmax)_n(x_i) = exp(x_i) / (1 + \sum_j exp(x_j))$
     """
+    output = softmax_n_shifted_zeros(input, 1, dim=dim)
+    return output if dtype is None else output.type(dtype=dtype)
 class OPTAttention(nn.Module):
         self.num_heads = config.num_attention_heads
         self.dropout = config.attention_dropout
         self.enable_bias = config.enable_bias
+        self.attention= nn.functional.softmax
         self.head_dim = self.embed_dim // self.num_heads
         self.is_causal = True
         self.num_heads = config.num_attention_heads
         self.dropout = config.attention_dropout
         self.enable_bias = config.enable_bias
+        self.attention= softmax_1
         self.head_dim = self.embed_dim // self.num_heads
         self.is_causal = True
         return attn_output, attn_weights_reshaped, past_key_value
+class OptFlashAttention2(OPTOutEffHop):
     """
     OPT flash attention module. This module inherits from `OPTAttention` as the weights of the module stays untouched.
     The only required change would be on the forward pass where it needs to correctly call the public API of flash