|
#!/usr/bin/env bash |
|
set -eux |
|
|
|
cd "$(dirname "$0")" |
|
|
|
MODEL_DIR="bench-TriLMs-models" |
|
LLAMA_CPP_PATH="." |
|
sizes=("1.5" "2.4" "3.9") |
|
types=("TQ1_0" "TQ2_0" "Q4_K_M" "Q8_0" "F16" "BF16") |
|
gputypes=("TQ2_0" "Q4_K_M" "Q8_0" "F16") |
|
|
|
function gather_models() { |
|
echo Gather the models |
|
if [ ! -d "$MODEL_DIR" ]; then |
|
mkdir -p -- "$MODEL_DIR" |
|
fi |
|
( |
|
cd "$MODEL_DIR" |
|
for sz in "${sizes[@]}"; do |
|
filename="TriLM_${sz}B_Unpacked-TQ1_0-F16.gguf" |
|
if [ ! -f "$filename" ]; then |
|
wget "https://huggingface.co/compilade/quant-tests/resolve/main/${filename}" |
|
fi |
|
done |
|
) |
|
} |
|
|
|
function build_llama_cpp() { |
|
echo Build llama.cpp for CPU |
|
|
|
( |
|
cd -- "$LLAMA_CPP_PATH" |
|
if [ -d build ]; then |
|
pwd |
|
rm -rf build |
|
fi |
|
mkdir build |
|
cd build |
|
cmake .. "$@" |
|
make -j llama-bench llama-quantize |
|
) |
|
} |
|
|
|
function quantize() { |
|
echo "Make all model types we'll test" |
|
( |
|
for sz in "${sizes[@]}"; do |
|
for ty in "${types[@]}"; do |
|
filenames=("$MODEL_DIR"/TriLM_"${sz}"B_Unpacked-{TQ1_0-F16,"$ty"}.gguf) |
|
if [ ! -f "${filenames[1]}" ]; then |
|
"$LLAMA_CPP_PATH"/build/bin/llama-quantize --allow-requantize "${filenames[@]}" "$ty" |
|
fi |
|
done |
|
done |
|
) |
|
} |
|
|
|
function bench() { |
|
echo Test each model one by one for different numbers of threads |
|
|
|
for sz in "${sizes[@]}"; do |
|
for ty in "$@"; do |
|
for th in 1 2 4 8; do |
|
{ |
|
"$LLAMA_CPP_PATH"/build/bin/llama-bench -v -m "${MODEL_DIR}/TriLM_${sz}B_Unpacked-${ty}.gguf" -t "${th}" -p 512 -n 128 -r 4 -o json |
|
printf "%s\n" "," |
|
} |
|
done |
|
done |
|
done |
|
} |
|
|
|
function bench_cpu() { |
|
bench "${types[@]}" >> "$1" |
|
} |
|
|
|
function bench_gpu() { |
|
bench "${gputypes[@]}" >> "$1" |
|
} |
|
|
|
currentTime="$(date +'%s')" |
|
resultFile="results-${currentTime}.json" |
|
infoFile="results-${currentTime}-info.txt" |
|
lscpu > "$infoFile" |
|
|
|
gather_models |
|
build_llama_cpp -DGGML_NATIVE=ON -DGGML_CPU=ON |
|
quantize |
|
|
|
echo "---" >> "$infoFile" |
|
ls -go "$MODEL_DIR" >> "$infoFile" |
|
|
|
bench_cpu "$resultFile" |
|
|
|
if [ -x "$(command -v nvidia-smi)" ]; then |
|
echo GPU detected, benchark with that too. |
|
build_llama_cpp -DGGML_NATIVE=ON -DGGML_CUDA=ON -DGGML_CUDA_F16=ON |
|
bench_gpu "$resultFile" |
|
fi |
|
|
|
|