|
MODEL_NAME= open-llama-3b |
|
PYTHON?= python |
|
LLAMA_TAG= 5c64a09 |
|
LLAMA_TAR= master-$(LLAMA_TAG).tar.gz |
|
LLAMA_DIR= llama.cpp-master-$(LLAMA_TAG) |
|
HF_REPO= openlm-research/open_llama_3b |
|
HF_REF= main |
|
|
|
DISTS= gh hf |
|
SITES.gh= https://github.com/ggerganov/llama.cpp/archive/refs/tags |
|
FILES.gh= $(LLAMA_TAR) |
|
SITES.hf= https://huggingface.co/$(HF_REPO)/resolve/$(HF_REF) |
|
FILES.hf= pytorch_model.bin tokenizer.model |
|
|
|
QUANTS= f16 q4_0 q4_1 q5_0 q5_1 q8_0 |
|
MODEL_FILES= $(foreach q,$(QUANTS),$(MODEL_NAME)-$(q).bin) |
|
|
|
.PHONY: all |
|
all: $(MODEL_FILES) SHA256SUMS |
|
|
|
DISTFILES= $(foreach d,$(DISTS),$(FILES.$(d))) |
|
$(DISTFILES) &: |
|
curl --location $(foreach d,$(DISTS), $(foreach f,$(FILES.$(d)), -o $(f) -z $(f) --url $(SITES.$(d))/$(f))) |
|
|
|
$(LLAMA_DIR): | $(LLAMA_TAR) |
|
tar -xf $(LLAMA_TAR) |
|
|
|
$(LLAMA_DIR)/quantize: | $(LLAMA_DIR) |
|
$(MAKE) -C $(LLAMA_DIR) quantize |
|
|
|
convert.py: convert.py.diff | $(LLAMA_DIR) |
|
patch -ru $(LLAMA_DIR)/convert.py -i $< -o $@ |
|
|
|
$(MODEL_NAME)-f16.bin: $(FILES.hf) convert.py |
|
$(PYTHON) convert.py --outtype f16 --outfile $@ . |
|
|
|
$(MODEL_NAME)-q%.bin: $(MODEL_NAME)-f16.bin | $(LLAMA_DIR)/quantize |
|
$(LLAMA_DIR)/quantize $< $@ q$* |
|
|
|
%.sha: % |
|
sha256sum $< > $@ |
|
|
|
SHA256SUMS: $(addsuffix .sha,$(MODEL_FILES)) |
|
cat $^ > $@ |
|
|