Files changed (1) hide show
  1. modelling_RW.py +3 -3
modelling_RW.py CHANGED
@@ -76,8 +76,8 @@ class RotaryEmbedding(torch.nn.Module):
76
  freqs = torch.einsum("i,j->ij", t, self.inv_freq)
77
  emb = torch.cat((freqs, freqs), dim=-1).to(device)
78
 
79
- if dtype in [torch.float16, torch.bfloat16]:
80
- emb = emb.float()
81
 
82
  self.cos_cached = emb.cos()[None, :, :]
83
  self.sin_cached = emb.sin()[None, :, :]
@@ -89,7 +89,7 @@ class RotaryEmbedding(torch.nn.Module):
89
 
90
  def forward(self, q, k):
91
  batch, seq_len, head_dim = q.shape
92
- cos, sin = self.cos_sin(seq_len, q.device)
93
  return (q * cos) + (rotate_half(q) * sin), (k * cos) + (rotate_half(k) * sin)
94
 
95
 
 
76
  freqs = torch.einsum("i,j->ij", t, self.inv_freq)
77
  emb = torch.cat((freqs, freqs), dim=-1).to(device)
78
 
79
+ if dtype != emb.dtype:
80
+ emb = emb.to(dtype)
81
 
82
  self.cos_cached = emb.cos()[None, :, :]
83
  self.sin_cached = emb.sin()[None, :, :]
 
89
 
90
  def forward(self, q, k):
91
  batch, seq_len, head_dim = q.shape
92
+ cos, sin = self.cos_sin(seq_len, q.device, q.dtype)
93
  return (q * cos) + (rotate_half(q) * sin), (k * cos) + (rotate_half(k) * sin)
94
 
95