ybelkada
/

llama-xformers

ybelkada commited on Nov 15, 2023

Commit

491c189

•

1 Parent(s): d281c7a

Update llama_xformers_attention.py

Files changed (1) hide show

llama_xformers_attention.py CHANGED Viewed

@@ -57,8 +57,9 @@ class LlamaXFormersAttention(LlamaAttention):
  key_states = key_states.transpose(1, 2)
  value_states = value_states.transpose(1, 2)
- #This is a nasty hack. We know attention_mask in transformers is either LowerTriangular or all Zeros.
- #We therefore check if one element in the upper triangular portion is zero. If it is, then the mask is all zeros.
  if attention_mask is None or attention_mask[0, 0, 0, 1] == 0:
  # input and output should be of form (bsz, q_len, num_heads, head_dim)
  attn_output = memory_efficient_attention(query_states, key_states, value_states, attn_bias=None)

  key_states = key_states.transpose(1, 2)
  value_states = value_states.transpose(1, 2)
+ # copied from https://github.com/oobabooga/text-generation-webui/pull/950/files
+ # This is a nasty hack. We know attention_mask in transformers is either LowerTriangular or all Zeros.
+ # We therefore check if one element in the upper triangular portion is zero. If it is, then the mask is all zeros.
  if attention_mask is None or attention_mask[0, 0, 0, 1] == 0:
  # input and output should be of form (bsz, q_len, num_heads, head_dim)
  attn_output = memory_efficient_attention(query_states, key_states, value_states, attn_bias=None)