quant-tests / bench-TriLMs.sh
compilade's picture
Add a Python-based benchmarking script
7b12d2f
raw
history blame
2.22 kB
#!/usr/bin/env bash
set -eux
cd "$(dirname "$0")"
MODEL_DIR="bench-TriLMs-models"
LLAMA_CPP_PATH="."
sizes=("1.5" "2.4" "3.9")
types=("TQ1_0" "TQ2_0" "Q4_K_M" "Q8_0" "F16" "BF16")
gputypes=("TQ2_0" "Q4_K_M" "Q8_0" "F16")
function gather_models() {
echo Gather the models
if [ ! -d "$MODEL_DIR" ]; then
mkdir -p -- "$MODEL_DIR"
fi
(
cd "$MODEL_DIR"
for sz in "${sizes[@]}"; do
filename="TriLM_${sz}B_Unpacked-TQ1_0-F16.gguf"
if [ ! -f "$filename" ]; then
wget "https://huggingface.co/compilade/quant-tests/resolve/main/${filename}"
fi
done
)
}
function build_llama_cpp() {
echo Build llama.cpp for CPU
(
cd -- "$LLAMA_CPP_PATH"
if [ -d build ]; then
pwd
rm -rf build
fi
mkdir build
cd build
cmake .. "$@"
make -j llama-bench llama-quantize
)
}
function quantize() {
echo "Make all model types we'll test"
(
for sz in "${sizes[@]}"; do
for ty in "${types[@]}"; do
filenames=("$MODEL_DIR"/TriLM_"${sz}"B_Unpacked-{TQ1_0-F16,"$ty"}.gguf)
if [ ! -f "${filenames[1]}" ]; then
"$LLAMA_CPP_PATH"/build/bin/llama-quantize --allow-requantize "${filenames[@]}" "$ty"
fi
done
done
)
}
function bench() {
echo Test each model one by one for different numbers of threads
for sz in "${sizes[@]}"; do
for ty in "$@"; do
for th in 1 2 4 8; do
{
"$LLAMA_CPP_PATH"/build/bin/llama-bench -v -m "${MODEL_DIR}/TriLM_${sz}B_Unpacked-${ty}.gguf" -t "${th}" -p 512 -n 128 -r 4 -o json
printf "%s\n" ","
}
done
done
done
}
function bench_cpu() {
bench "${types[@]}" >> "$1"
}
function bench_gpu() {
bench "${gputypes[@]}" >> "$1"
}
currentTime="$(date +'%s')"
resultFile="results-${currentTime}.json"
infoFile="results-${currentTime}-info.txt"
lscpu > "$infoFile"
gather_models
build_llama_cpp -DGGML_NATIVE=ON -DGGML_CPU=ON
quantize
echo "---" >> "$infoFile"
ls -go "$MODEL_DIR" >> "$infoFile"
bench_cpu "$resultFile"
if [ -x "$(command -v nvidia-smi)" ]; then
echo GPU detected, benchark with that too.
build_llama_cpp -DGGML_NATIVE=ON -DGGML_CUDA=ON -DGGML_CUDA_F16=ON
bench_gpu "$resultFile"
fi