File size: 2,073 Bytes
49c6950
 
e744ef3
 
 
 
 
 
49c6950
 
e744ef3
 
 
 
 
 
 
 
 
 
49c6950
 
e744ef3
 
 
 
49c6950
 
e744ef3
49c6950
 
 
e744ef3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49c6950
 
e744ef3
49c6950
e744ef3
 
 
 
 
 
 
 
 
 
 
 
 
 
49c6950
 
 
e744ef3
 
49c6950
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/bin/bash

echo "Starting setup process for auto-gguf-quant..."

# Make sure we have a clean environment
if [ -d "llama.cpp" ]; then
  echo "Found existing llama.cpp directory, removing it for a fresh start..."
  rm -rf llama.cpp
fi

# Clone llama.cpp with specific version that works well
echo "Cloning llama.cpp repository..."
git clone https://github.com/ggerganov/llama.cpp

# Copy calibration data to llama.cpp
echo "Copying calibration data..."
if [ -f "groups_merged.txt" ]; then
  cp groups_merged.txt llama.cpp/
else
  echo "Warning: groups_merged.txt not found in current directory."
fi

# Build the tools
echo "Building llama.cpp tools..."
cd llama.cpp

# Disable CUDA for HF spaces (not supported in free tier)
export GGML_CUDA=OFF
# Enable CPU optimizations
export GGML_AVX=1
export GGML_AVX2=1

# List all the files to make sure the converter is there
echo "Listing files in llama.cpp directory:"
ls -la

# Check for the existence of the converter script
echo "Checking for converter script..."
if [ -f "convert_hf_to_gguf.py" ]; then
  echo "Found converter script: convert_hf_to_gguf.py"
elif [ -f "convert-hf-to-gguf.py" ]; then
  echo "Found converter script: convert-hf-to-gguf.py"
else
  echo "Warning: Converter script not found in the expected locations."
  echo "Searching for any converter script..."
  find . -name "convert*.py" | grep -i hf
fi

# Build with CMake
echo "Building with CMake..."
mkdir -p build
cmake -B build -DBUILD_SHARED_LIBS=OFF
cmake --build build --config Release -j --target llama-quantize llama-gguf-split llama-imatrix

echo "Copying built binaries..."
# First try bin directory (newer llama.cpp versions)
if [ -d "build/bin" ]; then
  cp build/bin/llama-* ./ 2>/dev/null
  echo "Copied from build/bin/"
  ls -la ./llama-*
else 
  # Try direct build directory (older versions)
  cp build/llama-* ./ 2>/dev/null
  echo "Copied from build/"
  ls -la ./llama-*
fi

# Cleanup build directory to save space
echo "Cleaning up build directory..."
rm -rf build

cd ..

echo "Setup complete. Starting Gradio app..."
python app.py