SpiridonSunRotator
commited on
Commit
·
89d7ce1
1
Parent(s):
6e61c89
Update modelling_RW.py
Browse filesI've observed that loading this model to float16 or bfloat16 lead to bug
```
modelling_RW.py", line 289, in forward
attn_output = F.scaled_dot_product_attention(
RuntimeError: Expected query, key, and value to have the same dtype, but got query.dtype: float key.dtype: float and value.dtype: c10::Half instead.
```
This is because the output of `cos_sin` method of `RotaryEmbedding` class is `float32`.
In this commit I propose a simple fix for the model to work successfully with half precision.
- modelling_RW.py +7 -2
modelling_RW.py
CHANGED
@@ -76,8 +76,8 @@ class RotaryEmbedding(torch.nn.Module):
|
|
76 |
freqs = torch.einsum("i,j->ij", t, self.inv_freq)
|
77 |
emb = torch.cat((freqs, freqs), dim=-1).to(device)
|
78 |
|
79 |
-
if dtype
|
80 |
-
emb = emb.
|
81 |
|
82 |
self.cos_cached = emb.cos()[None, :, :]
|
83 |
self.sin_cached = emb.sin()[None, :, :]
|
@@ -87,6 +87,11 @@ class RotaryEmbedding(torch.nn.Module):
|
|
87 |
|
88 |
return self.cos_cached, self.sin_cached
|
89 |
|
|
|
|
|
|
|
|
|
|
|
90 |
def forward(self, q, k):
|
91 |
batch, seq_len, head_dim = q.shape
|
92 |
cos, sin = self.cos_sin(seq_len, q.device)
|
|
|
76 |
freqs = torch.einsum("i,j->ij", t, self.inv_freq)
|
77 |
emb = torch.cat((freqs, freqs), dim=-1).to(device)
|
78 |
|
79 |
+
if dtype != emb.dtype:
|
80 |
+
emb = emb.to(dtype)
|
81 |
|
82 |
self.cos_cached = emb.cos()[None, :, :]
|
83 |
self.sin_cached = emb.sin()[None, :, :]
|
|
|
87 |
|
88 |
return self.cos_cached, self.sin_cached
|
89 |
|
90 |
+
def forward(self, q, k):
|
91 |
+
batch, seq_len, head_dim = q.shape
|
92 |
+
cos, sin = self.cos_sin(seq_len, q.device, q.dtype)
|
93 |
+
return (q * cos) + (rotate_half(q) * sin), (k * cos) + (rotate_half(k) * sin)
|
94 |
+
|
95 |
def forward(self, q, k):
|
96 |
batch, seq_len, head_dim = q.shape
|
97 |
cos, sin = self.cos_sin(seq_len, q.device)
|