add files

Files changed (11) hide show

.gitattributes CHANGED Viewed

@@ -4,6 +4,7 @@
 *.bz2 filter=lfs diff=lfs merge=lfs -text
 *.ckpt filter=lfs diff=lfs merge=lfs -text
 *.ftz filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.joblib filter=lfs diff=lfs merge=lfs -text

 *.bz2 filter=lfs diff=lfs merge=lfs -text
 *.ckpt filter=lfs diff=lfs merge=lfs -text
 *.ftz filter=lfs diff=lfs merge=lfs -text
+*.gguf filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.joblib filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

+llama.cpp*/
+venv/
+pytorch_model.bin
+*.sha
+*.tar.gz
+tokenizer.model
+config.json
+tokenizer_config.json

Makefile ADDED Viewed

+MODEL_NAME=	open-llama-3b
+PYTHON?=	python
+LLAMA_BUILD=	1132
+LLAMA_TAR=	b$(LLAMA_BUILD).tar.gz
+LLAMA_DIR=	llama.cpp-b$(LLAMA_BUILD)
+LLAMA_FLAGS=	LLAMA_NO_K_QUANTS=1
+HF_REPO=	openlm-research/open_llama_3b
+HF_REF=		main
+HF_FILES=	pytorch_model.bin \
+		tokenizer.model \
+		config.json \
+		tokenizer_config.json
+$(HF_FILES): 	SITE=	https://huggingface.co/$(HF_REPO)/resolve/$(HF_REF)
+$(LLAMA_TAR): 	SITE=	https://github.com/ggerganov/llama.cpp/archive/refs/tags
+QUANTS=		f16 q4_0 q4_1 q5_0 q5_1 q8_0
+FILES=		$(HF_FILES) $(LLAMA_TAR)
+MODEL_FILES=	$(foreach q,$(QUANTS),$(MODEL_NAME)-$(q).gguf)
+.PHONY: all
+all: $(MODEL_FILES) SHA256SUMS
+$(FILES):
+	curl -L -o $@ --url $(SITE)/$@
+$(LLAMA_DIR): | $(LLAMA_TAR)
+	tar -xf $(LLAMA_TAR)
+$(LLAMA_DIR)/quantize: | $(LLAMA_DIR)
+	$(MAKE) -C $(LLAMA_DIR) $(LLAMA_FLAGS) quantize
+venv:
+	$(PYTHON) -m venv venv
+	venv/bin/pip install -e $(LLAMA_DIR)/gguf-py
+	venv/bin/pip install -r $(LLAMA_DIR)/requirements.txt
+$(MODEL_NAME)-f16.gguf: $(HF_FILES) | $(LLAMA_DIR) venv
+	venv/bin/python $(LLAMA_DIR)/convert.py --outtype f16 --outfile $@ .
+$(MODEL_NAME)-q%.gguf: $(MODEL_NAME)-f16.gguf $(LLAMA_DIR)/quantize
+	$(LLAMA_DIR)/quantize $< $@ q$*
+%.sha: %
+	sha256sum $< > $@
+SHA256SUMS: $(addsuffix .sha,$(MODEL_FILES))
+	cat $^ > $@

README.md CHANGED Viewed

@@ -1,3 +1,20 @@
 ---
 license: apache-2.0
 ---

 ---
 license: apache-2.0
 ---
+# gguf versions of OpenLLaMa 3B
+- Version: 1T tokens final version
+- Project: [OpenLLaMA: An Open Reproduction of LLaMA](https://github.com/openlm-research/open_llama)
+- Model: [openlm-research/open_llama_3b](https://huggingface.co/openlm-research/open_llama_3b)
+- [llama.cpp](https://github.com/ggerganov/llama.cpp): build 1012 (6381d4e) or later
+- [ggml version](https://huggingface.co/SlyEcho/open_llama_3b_ggml)
+## Newer quantizations
+There are now more quantization types in llama.cpp, some lower than 4 bits.
+Currently these are not supported, maybe because some weights have shapes that don't divide by 256.
+## Perplexity on wiki.test.406
+Coming soon...

SHA256SUMS ADDED Viewed

+589e21251921f052d542a112d7a766056ff2f2ed6d1951677890cd5313a9eaec  open-llama-3b-f16.gguf
+9a496b3352a7153be348279eb9205901524b71659e1b8700217d61b24af16b68  open-llama-3b-q4_0.gguf
+cd3801fc09621097494220333182552847963ff216b3b8c91d3cb5528325f889  open-llama-3b-q4_1.gguf
+eebd692cbaca097d7ea3dcc460ebdeb7c762598004bdc31c663b0d77e0a01d9c  open-llama-3b-q5_0.gguf
+f0ef8bd8d3ef4fde9e8ac5610763893635e9d6c26c9b26df27b68012fb4c7424  open-llama-3b-q5_1.gguf
+c48a108d99a5f07dcfbe7c10a40ee0f0663fedf1b171a49f254fb8d767258c19  open-llama-3b-q8_0.gguf

open-llama-3b-f16.gguf ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:589e21251921f052d542a112d7a766056ff2f2ed6d1951677890cd5313a9eaec
+size 6854058688

open-llama-3b-q4_0.gguf ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a496b3352a7153be348279eb9205901524b71659e1b8700217d61b24af16b68
+size 1928746720

open-llama-3b-q4_1.gguf ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:cd3801fc09621097494220333182552847963ff216b3b8c91d3cb5528325f889
+size 2142890720

open-llama-3b-q5_0.gguf ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:eebd692cbaca097d7ea3dcc460ebdeb7c762598004bdc31c663b0d77e0a01d9c
+size 2357034720

open-llama-3b-q5_1.gguf ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:f0ef8bd8d3ef4fde9e8ac5610763893635e9d6c26c9b26df27b68012fb4c7424
+size 2571178720

open-llama-3b-q8_0.gguf ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:c48a108d99a5f07dcfbe7c10a40ee0f0663fedf1b171a49f254fb8d767258c19
+size 3641898720