moidhassan commited on
Commit
5b7216f
·
verified ·
1 Parent(s): 794ffcf

Fix for RuntimeError: FlashAttention only support fp16 and bf16 data type during fine tuning.

Browse files

Updating positional_embedding code as suggested here - https://huggingface.co/microsoft/Phi-3-small-8k-instruct/discussions/11#6659a17e8b11da966e8e510c

Files changed (1) hide show
  1. positional_embedding.py +2 -2
positional_embedding.py CHANGED
@@ -269,10 +269,10 @@ class RotaryEmbedding(torch.nn.Module):
269
  return (
270
  apply_rotary_pos_emb(
271
  q, cos_cached[seqlen_offset:seq_len], sin_cached[seqlen_offset:seq_len], seq_dimension=seq_dimension
272
- ),
273
  apply_rotary_pos_emb(
274
  k, cos_cached[seqlen_offset:seq_len], sin_cached[seqlen_offset:seq_len], seq_dimension=seq_dimension
275
- ),
276
  )
277
 
278
  @classmethod
 
269
  return (
270
  apply_rotary_pos_emb(
271
  q, cos_cached[seqlen_offset:seq_len], sin_cached[seqlen_offset:seq_len], seq_dimension=seq_dimension
272
+ ).to(q.dtype),
273
  apply_rotary_pos_emb(
274
  k, cos_cached[seqlen_offset:seq_len], sin_cached[seqlen_offset:seq_len], seq_dimension=seq_dimension
275
+ ).to(q.dtype),
276
  )
277
 
278
  @classmethod