File size: 1,440 Bytes
a8f706c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
--- a/convert.py	2023-05-30 20:48:07.687486627 +0300
+++ b/convert.py	2023-05-30 20:47:55.854142065 +0300
@@ -143,12 +143,22 @@
     def guessed(model: 'LazyModel', file_type: GGMLFileType) -> 'Params':
         n_vocab, n_embd = model["tok_embeddings.weight"].shape
 
+        n_mult=256
+        n_head=n_embd // 128
+        n_layer=next(i for i in itertools.count() if f"layers.{i}.attention.wq.weight" not in model)
+
+        # TODO: hack for open_llama_3b
+        if n_embd == 3200:
+            n_mult = 216
+            n_head = 32
+            n_layer = 26
+
         return Params(
             n_vocab=n_vocab,
             n_embd=n_embd,
-            n_mult=256,
-            n_head=n_embd // 128,
-            n_layer=next(i for i in itertools.count() if f"layers.{i}.attention.wq.weight" not in model),
+            n_mult=n_mult,
+            n_head=n_head,
+            n_layer=n_layer,
             file_type=file_type,
         )
 
@@ -597,7 +607,9 @@
     out["norm.weight"] = model["model.norm.weight"]
     out["output.weight"] = model["lm_head.weight"]
 
-    n_head = model["model.layers.0.self_attn.q_proj.weight"].shape[1] // 128
+    # TODO: hack for open_llama_3b
+    n_embd = model["model.layers.0.self_attn.q_proj.weight"].shape[1]
+    n_head = 32 if n_embd == 3200 else n_embd // 128
     for i in itertools.count():
         if f"model.layers.{i}.self_attn.q_proj.weight" not in model:
             break