Update to 1000T token final version
Browse files- .gitignore +6 -0
- Makefile +42 -0
- README.md +7 -2
- SHA256SUMS +6 -6
- convert.py.diff +39 -0
- open-llama-3b-f16.bin +1 -1
- open-llama-3b-q4_0.bin +1 -1
- open-llama-3b-q4_1.bin +1 -1
- open-llama-3b-q5_0.bin +1 -1
- open-llama-3b-q5_1.bin +1 -1
- open-llama-3b-q8_0.bin +1 -1
.gitignore
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
convert.py
|
2 |
+
llama.cpp/
|
3 |
+
pytorch_model.bin
|
4 |
+
*.sha
|
5 |
+
*.tar.gz
|
6 |
+
tokenizer.model
|
Makefile
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MODEL_NAME= open-llama-3b
|
2 |
+
PYTHON?= python
|
3 |
+
LLAMA_TAG= 5c64a09
|
4 |
+
LLAMA_TAR= master-$(LLAMA_TAG).tar.gz
|
5 |
+
HF_REPO= openlm-research/open_llama_3b
|
6 |
+
HF_REF= main
|
7 |
+
HF_FILES= pytorch_model.bin tokenizer.model
|
8 |
+
$(HF_FILES): SITE= https://huggingface.co/$(HF_REPO)/resolve/$(HF_REF)
|
9 |
+
$(LLAMA_TAR): SITE= https://github.com/ggerganov/llama.cpp/archive/refs/tags
|
10 |
+
|
11 |
+
FILES= $(HF_FILES) $(LLAMA_TAR)
|
12 |
+
|
13 |
+
QUANTS= f16 q4_0 q4_1 q5_0 q5_1 q8_0
|
14 |
+
MODEL_FILES= $(addsuffix .bin,$(addprefix $(MODEL_NAME)-,$(QUANTS)))
|
15 |
+
|
16 |
+
.PHONY: all
|
17 |
+
all: $(MODEL_FILES) SHA256SUMS
|
18 |
+
|
19 |
+
$(FILES):
|
20 |
+
curl -L -o $@ --url $(SITE)/$@
|
21 |
+
|
22 |
+
llama.cpp: $(LLAMA_TAR)
|
23 |
+
mkdir -p $@
|
24 |
+
tar -xf $< --strip-components=1 -C $@
|
25 |
+
|
26 |
+
llama.cpp/quantize: llama.cpp
|
27 |
+
$(MAKE) -C llama.cpp quantize
|
28 |
+
|
29 |
+
convert.py: convert.py.diff | llama.cpp
|
30 |
+
patch -ru llama.cpp/convert.py -i $< -o $@
|
31 |
+
|
32 |
+
$(MODEL_NAME)-f16.bin: $(HF_FILES) | convert.py
|
33 |
+
$(PYTHON) convert.py --outtype f16 --outfile $@ .
|
34 |
+
|
35 |
+
$(MODEL_NAME)-q%.bin: $(MODEL_NAME)-f16.bin | llama.cpp/quantize
|
36 |
+
llama.cpp/quantize $< $@ q$*
|
37 |
+
|
38 |
+
%.sha: %
|
39 |
+
sha256sum $< > $@
|
40 |
+
|
41 |
+
SHA256SUMS: $(addsuffix .sha,$(MODEL_FILES))
|
42 |
+
cat $^ > $@
|
README.md
CHANGED
@@ -4,11 +4,16 @@ license: apache-2.0
|
|
4 |
|
5 |
# ggml versions of OpenLLaMa 3B
|
6 |
|
7 |
-
- Version:
|
8 |
- Project: [OpenLLaMA: An Open Reproduction of LLaMA](https://github.com/openlm-research/open_llama)
|
9 |
-
- Model: [openlm-research/
|
10 |
- [llama.cpp](https://github.com/ggerganov/llama.cpp): build 607(ffb06a3) or later
|
11 |
|
12 |
## Use with llama.cpp
|
13 |
|
14 |
Support is now merged to master branch.
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
# ggml versions of OpenLLaMa 3B
|
6 |
|
7 |
+
- Version: 1T tokens final version
|
8 |
- Project: [OpenLLaMA: An Open Reproduction of LLaMA](https://github.com/openlm-research/open_llama)
|
9 |
+
- Model: [openlm-research/open_llama_3b](https://huggingface.co/openlm-research/open_llama_3b)
|
10 |
- [llama.cpp](https://github.com/ggerganov/llama.cpp): build 607(ffb06a3) or later
|
11 |
|
12 |
## Use with llama.cpp
|
13 |
|
14 |
Support is now merged to master branch.
|
15 |
+
|
16 |
+
## Newer quantizations
|
17 |
+
|
18 |
+
There are now more quantization types in llama.cpp, some lower than 4 bits.
|
19 |
+
Currently these are not supported, maybe because some weights have shapes that don't divide by 256.
|
SHA256SUMS
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
|
|
1 |
+
f123887011114da3a9b3ffb06b6fc695aaecf0a5503c38e1589379f7304f37f6 open-llama-3b-f16.bin
|
2 |
+
ec0460668c7fa50eee4d96b8687dbb29fa42b91c634151fe9c0c53cace0ab81a open-llama-3b-q4_0.bin
|
3 |
+
15325aeae94da8886ad94ac46491cd103111e82d3be910aaaaf7c3fa0bc3c128 open-llama-3b-q4_1.bin
|
4 |
+
87d6146b47177611f8c4529fb6186a093c512bd09050e1625819e82a1946e2da open-llama-3b-q5_0.bin
|
5 |
+
ce5de27bfccd02a34465a6d8a80ac8ad0baff186e600c29f6e3c6740f5a2dfd4 open-llama-3b-q5_1.bin
|
6 |
+
9bbe718478161752fcc085f4c7393a6472c0b4b003c43b681208617241884d7d open-llama-3b-q8_0.bin
|
convert.py.diff
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
--- a/convert.py 2023-05-30 20:48:07.687486627 +0300
|
2 |
+
+++ b/convert.py 2023-05-30 20:47:55.854142065 +0300
|
3 |
+
@@ -143,12 +143,22 @@
|
4 |
+
def guessed(model: 'LazyModel', file_type: GGMLFileType) -> 'Params':
|
5 |
+
n_vocab, n_embd = model["tok_embeddings.weight"].shape
|
6 |
+
|
7 |
+
+ n_mult=256
|
8 |
+
+ n_head=n_embd // 128
|
9 |
+
+ n_layer=next(i for i in itertools.count() if f"layers.{i}.attention.wq.weight" not in model)
|
10 |
+
+
|
11 |
+
+ # TODO: hack for open_llama_3b
|
12 |
+
+ if n_embd == 3200:
|
13 |
+
+ n_mult = 216
|
14 |
+
+ n_head = 32
|
15 |
+
+ n_layer = 26
|
16 |
+
+
|
17 |
+
return Params(
|
18 |
+
n_vocab=n_vocab,
|
19 |
+
n_embd=n_embd,
|
20 |
+
- n_mult=256,
|
21 |
+
- n_head=n_embd // 128,
|
22 |
+
- n_layer=next(i for i in itertools.count() if f"layers.{i}.attention.wq.weight" not in model),
|
23 |
+
+ n_mult=n_mult,
|
24 |
+
+ n_head=n_head,
|
25 |
+
+ n_layer=n_layer,
|
26 |
+
file_type=file_type,
|
27 |
+
)
|
28 |
+
|
29 |
+
@@ -597,7 +607,9 @@
|
30 |
+
out["norm.weight"] = model["model.norm.weight"]
|
31 |
+
out["output.weight"] = model["lm_head.weight"]
|
32 |
+
|
33 |
+
- n_head = model["model.layers.0.self_attn.q_proj.weight"].shape[1] // 128
|
34 |
+
+ # TODO: hack for open_llama_3b
|
35 |
+
+ n_embd = model["model.layers.0.self_attn.q_proj.weight"].shape[1]
|
36 |
+
+ n_head = 32 if n_embd == 3200 else n_embd // 128
|
37 |
+
for i in itertools.count():
|
38 |
+
if f"model.layers.{i}.self_attn.q_proj.weight" not in model:
|
39 |
+
break
|
open-llama-3b-f16.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6853758208
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f123887011114da3a9b3ffb06b6fc695aaecf0a5503c38e1589379f7304f37f6
|
3 |
size 6853758208
|
open-llama-3b-q4_0.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1928446208
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ec0460668c7fa50eee4d96b8687dbb29fa42b91c634151fe9c0c53cace0ab81a
|
3 |
size 1928446208
|
open-llama-3b-q4_1.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2142590208
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:15325aeae94da8886ad94ac46491cd103111e82d3be910aaaaf7c3fa0bc3c128
|
3 |
size 2142590208
|
open-llama-3b-q5_0.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2356734208
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:87d6146b47177611f8c4529fb6186a093c512bd09050e1625819e82a1946e2da
|
3 |
size 2356734208
|
open-llama-3b-q5_1.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2570878208
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce5de27bfccd02a34465a6d8a80ac8ad0baff186e600c29f6e3c6740f5a2dfd4
|
3 |
size 2570878208
|
open-llama-3b-q8_0.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3641598208
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9bbe718478161752fcc085f4c7393a6472c0b4b003c43b681208617241884d7d
|
3 |
size 3641598208
|