#!/bin/bash echo "Starting setup process for auto-gguf-quant..." # Make sure we have a clean environment if [ -d "llama.cpp" ]; then echo "Found existing llama.cpp directory, removing it for a fresh start..." rm -rf llama.cpp fi # Clone llama.cpp with specific version that works well echo "Cloning llama.cpp repository..." git clone https://github.com/ggerganov/llama.cpp # Copy calibration data to llama.cpp echo "Copying calibration data..." if [ -f "groups_merged.txt" ]; then cp groups_merged.txt llama.cpp/ else echo "Warning: groups_merged.txt not found in current directory." fi # Build the tools echo "Building llama.cpp tools..." cd llama.cpp # Disable CUDA for HF spaces (not supported in free tier) export GGML_CUDA=OFF # Enable CPU optimizations export GGML_AVX=1 export GGML_AVX2=1 # List all the files to make sure the converter is there echo "Listing files in llama.cpp directory:" ls -la # Check for the existence of the converter script echo "Checking for converter script..." if [ -f "convert_hf_to_gguf.py" ]; then echo "Found converter script: convert_hf_to_gguf.py" elif [ -f "convert-hf-to-gguf.py" ]; then echo "Found converter script: convert-hf-to-gguf.py" else echo "Warning: Converter script not found in the expected locations." echo "Searching for any converter script..." find . -name "convert*.py" | grep -i hf fi # Build with CMake echo "Building with CMake..." mkdir -p build cmake -B build -DBUILD_SHARED_LIBS=OFF cmake --build build --config Release -j --target llama-quantize llama-gguf-split llama-imatrix echo "Copying built binaries..." # First try bin directory (newer llama.cpp versions) if [ -d "build/bin" ]; then cp build/bin/llama-* ./ 2>/dev/null echo "Copied from build/bin/" ls -la ./llama-* else # Try direct build directory (older versions) cp build/llama-* ./ 2>/dev/null echo "Copied from build/" ls -la ./llama-* fi # Cleanup build directory to save space echo "Cleaning up build directory..." rm -rf build cd .. echo "Setup complete. Starting Gradio app..." python app.py