Spaces:
Sleeping
Sleeping
fix
Browse files
app.py
CHANGED
@@ -12,12 +12,11 @@ os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
|
|
12 |
import gradio as gr
|
13 |
|
14 |
from huggingface_hub import HfApi, list_repo_files, hf_hub_download, login, whoami
|
15 |
-
from huggingface_hub.hf_api import CommitInfo # This is the correct import
|
16 |
from apscheduler.schedulers.background import BackgroundScheduler
|
17 |
|
18 |
# MODEL_REPO to monitor
|
19 |
SOURCE_MODEL_REPO = "Sculptor-AI/Ursa_Minor"
|
20 |
-
CONVERSION_SCRIPT = "./llama.cpp/
|
21 |
STATUS_FILE = "status.json"
|
22 |
|
23 |
# Quantization configurations in order of processing
|
@@ -174,6 +173,79 @@ def check_for_updates():
|
|
174 |
save_status()
|
175 |
return False
|
176 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
177 |
def process_model():
|
178 |
"""Process the model to create all quantized versions"""
|
179 |
if processing_lock.locked():
|
@@ -182,6 +254,13 @@ def process_model():
|
|
182 |
|
183 |
with processing_lock:
|
184 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
185 |
# Validate authentication
|
186 |
try:
|
187 |
user_info = whoami()
|
@@ -248,11 +327,26 @@ def process_model():
|
|
248 |
fp16_path = str(outdir / f"{model_name}.fp16.gguf")
|
249 |
log_message(f"Converting model to FP16: {fp16_path}")
|
250 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
251 |
result = subprocess.run([
|
252 |
"python", CONVERSION_SCRIPT, str(local_dir), "--outtype", "f16", "--outfile", fp16_path
|
253 |
], shell=False, capture_output=True, text=True)
|
254 |
|
255 |
if result.returncode != 0:
|
|
|
|
|
256 |
raise Exception(f"Error converting to fp16: {result.stderr}")
|
257 |
|
258 |
log_message("Model converted to fp16 successfully!")
|
@@ -262,13 +356,22 @@ def process_model():
|
|
262 |
train_data_path = "llama.cpp/groups_merged.txt" # Default calibration dataset
|
263 |
|
264 |
if not os.path.isfile(train_data_path):
|
265 |
-
log_message(f"Warning: Training data file not found
|
266 |
-
|
267 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
268 |
generate_importance_matrix(fp16_path, train_data_path, imatrix_path)
|
269 |
-
|
270 |
-
log_message(f"Error generating importance matrix: {str(e)}", error=True)
|
271 |
imatrix_path = None
|
|
|
|
|
|
|
272 |
|
273 |
# Process each quantization type
|
274 |
total_quants = len(QUANT_CONFIGS)
|
@@ -474,7 +577,8 @@ def create_ui():
|
|
474 |
check_button = gr.Button("Check for Updates", variant="primary")
|
475 |
process_button = gr.Button("Force Processing", variant="secondary")
|
476 |
|
477 |
-
|
|
|
478 |
|
479 |
with gr.Tab("Quantization Status"):
|
480 |
quant_status = gr.DataFrame(
|
@@ -540,6 +644,9 @@ def initialize():
|
|
540 |
# Load status from file
|
541 |
load_status()
|
542 |
|
|
|
|
|
|
|
543 |
# Schedule regular checks for updates
|
544 |
scheduler = BackgroundScheduler()
|
545 |
scheduler.add_job(check_and_process, 'interval', minutes=60) # Check every hour
|
|
|
12 |
import gradio as gr
|
13 |
|
14 |
from huggingface_hub import HfApi, list_repo_files, hf_hub_download, login, whoami
|
|
|
15 |
from apscheduler.schedulers.background import BackgroundScheduler
|
16 |
|
17 |
# MODEL_REPO to monitor
|
18 |
SOURCE_MODEL_REPO = "Sculptor-AI/Ursa_Minor"
|
19 |
+
CONVERSION_SCRIPT = "./llama.cpp/convert-hf-to-gguf.py" # Updated script path
|
20 |
STATUS_FILE = "status.json"
|
21 |
|
22 |
# Quantization configurations in order of processing
|
|
|
173 |
save_status()
|
174 |
return False
|
175 |
|
176 |
+
def check_llama_cpp():
|
177 |
+
"""Check if llama.cpp is properly set up and build if needed"""
|
178 |
+
try:
|
179 |
+
if not os.path.exists("llama.cpp"):
|
180 |
+
log_message("llama.cpp directory not found, cloning repository...")
|
181 |
+
subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp"], check=True)
|
182 |
+
|
183 |
+
# Check for critical files
|
184 |
+
converter_path = os.path.join("llama.cpp", "convert-hf-to-gguf.py")
|
185 |
+
if not os.path.exists(converter_path):
|
186 |
+
# Try alternative path
|
187 |
+
old_converter_path = os.path.join("llama.cpp", "convert_hf_to_gguf.py")
|
188 |
+
if os.path.exists(old_converter_path):
|
189 |
+
log_message(f"Found converter at {old_converter_path}, using this path")
|
190 |
+
global CONVERSION_SCRIPT
|
191 |
+
CONVERSION_SCRIPT = old_converter_path
|
192 |
+
else:
|
193 |
+
log_message("Converter script not found, listing files in llama.cpp...")
|
194 |
+
files = os.listdir("llama.cpp")
|
195 |
+
log_message(f"Files in llama.cpp: {files}")
|
196 |
+
|
197 |
+
# Search for any converter script
|
198 |
+
for file in files:
|
199 |
+
if file.startswith("convert") and file.endswith(".py"):
|
200 |
+
log_message(f"Found alternative converter: {file}")
|
201 |
+
CONVERSION_SCRIPT = os.path.join("llama.cpp", file)
|
202 |
+
break
|
203 |
+
|
204 |
+
# Build the tools
|
205 |
+
log_message("Building llama.cpp tools...")
|
206 |
+
os.chdir("llama.cpp")
|
207 |
+
|
208 |
+
# Check if build directory exists
|
209 |
+
if not os.path.exists("build"):
|
210 |
+
os.makedirs("build")
|
211 |
+
|
212 |
+
# Configure and build
|
213 |
+
subprocess.run(["cmake", "-B", "build", "-DBUILD_SHARED_LIBS=OFF"], check=True)
|
214 |
+
subprocess.run(["cmake", "--build", "build", "--config", "Release", "-j", "--target", "llama-quantize", "llama-gguf-split", "llama-imatrix"], check=True)
|
215 |
+
|
216 |
+
# Copy binaries
|
217 |
+
log_message("Copying built binaries...")
|
218 |
+
try:
|
219 |
+
# Different builds may put binaries in different places
|
220 |
+
if os.path.exists(os.path.join("build", "bin")):
|
221 |
+
for binary in ["llama-quantize", "llama-gguf-split", "llama-imatrix"]:
|
222 |
+
src = os.path.join("build", "bin", binary)
|
223 |
+
if os.path.exists(src):
|
224 |
+
subprocess.run(["cp", src, "./"], check=True)
|
225 |
+
else:
|
226 |
+
for binary in ["llama-quantize", "llama-gguf-split", "llama-imatrix"]:
|
227 |
+
src = os.path.join("build", binary)
|
228 |
+
if os.path.exists(src):
|
229 |
+
subprocess.run(["cp", src, "./"], check=True)
|
230 |
+
except Exception as e:
|
231 |
+
log_message(f"Error copying binaries: {str(e)}", error=True)
|
232 |
+
|
233 |
+
# Return to the original directory
|
234 |
+
os.chdir("..")
|
235 |
+
|
236 |
+
# Make sure we have the calibration data
|
237 |
+
if not os.path.exists(os.path.join("llama.cpp", "groups_merged.txt")):
|
238 |
+
log_message("Copying calibration data...")
|
239 |
+
if os.path.exists("groups_merged.txt"):
|
240 |
+
subprocess.run(["cp", "groups_merged.txt", "llama.cpp/"], check=True)
|
241 |
+
|
242 |
+
log_message("llama.cpp setup completed successfully")
|
243 |
+
return True
|
244 |
+
except Exception as e:
|
245 |
+
log_message(f"Error setting up llama.cpp: {str(e)}", error=True)
|
246 |
+
traceback.print_exc()
|
247 |
+
return False
|
248 |
+
|
249 |
def process_model():
|
250 |
"""Process the model to create all quantized versions"""
|
251 |
if processing_lock.locked():
|
|
|
254 |
|
255 |
with processing_lock:
|
256 |
try:
|
257 |
+
# Check llama.cpp is set up
|
258 |
+
if not check_llama_cpp():
|
259 |
+
log_message("Failed to set up llama.cpp, aborting", error=True)
|
260 |
+
current_status["status"] = "Error setting up llama.cpp"
|
261 |
+
save_status()
|
262 |
+
return
|
263 |
+
|
264 |
# Validate authentication
|
265 |
try:
|
266 |
user_info = whoami()
|
|
|
327 |
fp16_path = str(outdir / f"{model_name}.fp16.gguf")
|
328 |
log_message(f"Converting model to FP16: {fp16_path}")
|
329 |
|
330 |
+
# Check if the converter script exists
|
331 |
+
if not os.path.exists(CONVERSION_SCRIPT):
|
332 |
+
log_message(f"Converter script not found at {CONVERSION_SCRIPT}, searching for alternatives", error=True)
|
333 |
+
for root, dirs, files in os.walk("llama.cpp"):
|
334 |
+
for file in files:
|
335 |
+
if file.startswith("convert") and file.endswith(".py"):
|
336 |
+
global CONVERSION_SCRIPT
|
337 |
+
CONVERSION_SCRIPT = os.path.join(root, file)
|
338 |
+
log_message(f"Found converter at {CONVERSION_SCRIPT}")
|
339 |
+
break
|
340 |
+
|
341 |
+
log_message(f"Using converter script: {CONVERSION_SCRIPT}")
|
342 |
+
|
343 |
result = subprocess.run([
|
344 |
"python", CONVERSION_SCRIPT, str(local_dir), "--outtype", "f16", "--outfile", fp16_path
|
345 |
], shell=False, capture_output=True, text=True)
|
346 |
|
347 |
if result.returncode != 0:
|
348 |
+
log_message(f"Converter stderr: {result.stderr}")
|
349 |
+
log_message(f"Converter stdout: {result.stdout}")
|
350 |
raise Exception(f"Error converting to fp16: {result.stderr}")
|
351 |
|
352 |
log_message("Model converted to fp16 successfully!")
|
|
|
356 |
train_data_path = "llama.cpp/groups_merged.txt" # Default calibration dataset
|
357 |
|
358 |
if not os.path.isfile(train_data_path):
|
359 |
+
log_message(f"Warning: Training data file not found at {train_data_path}, searching alternatives...")
|
360 |
+
# Try to find it elsewhere
|
361 |
+
if os.path.exists("groups_merged.txt"):
|
362 |
+
train_data_path = "groups_merged.txt"
|
363 |
+
log_message(f"Found training data at {train_data_path}")
|
364 |
+
else:
|
365 |
+
log_message("Calibration data not found. Some quantizations may not work.", error=True)
|
366 |
+
|
367 |
+
try:
|
368 |
+
if os.path.isfile(train_data_path):
|
369 |
generate_importance_matrix(fp16_path, train_data_path, imatrix_path)
|
370 |
+
else:
|
|
|
371 |
imatrix_path = None
|
372 |
+
except Exception as e:
|
373 |
+
log_message(f"Error generating importance matrix: {str(e)}", error=True)
|
374 |
+
imatrix_path = None
|
375 |
|
376 |
# Process each quantization type
|
377 |
total_quants = len(QUANT_CONFIGS)
|
|
|
577 |
check_button = gr.Button("Check for Updates", variant="primary")
|
578 |
process_button = gr.Button("Force Processing", variant="secondary")
|
579 |
|
580 |
+
# Remove the 'label' parameter since it's not supported
|
581 |
+
progress_bar = gr.Progress()
|
582 |
|
583 |
with gr.Tab("Quantization Status"):
|
584 |
quant_status = gr.DataFrame(
|
|
|
644 |
# Load status from file
|
645 |
load_status()
|
646 |
|
647 |
+
# Check and setup llama.cpp
|
648 |
+
check_llama_cpp()
|
649 |
+
|
650 |
# Schedule regular checks for updates
|
651 |
scheduler = BackgroundScheduler()
|
652 |
scheduler.add_job(check_and_process, 'interval', minutes=60) # Check every hour
|
start.sh
CHANGED
@@ -1,31 +1,75 @@
|
|
1 |
#!/bin/bash
|
2 |
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
|
|
|
|
7 |
fi
|
8 |
|
9 |
-
#
|
10 |
-
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
fi
|
14 |
|
|
|
|
|
|
|
|
|
15 |
# Disable CUDA for HF spaces (not supported in free tier)
|
16 |
-
# We should still build with optimizations for CPU
|
17 |
export GGML_CUDA=OFF
|
|
|
18 |
export GGML_AVX=1
|
19 |
export GGML_AVX2=1
|
20 |
|
21 |
-
|
22 |
-
echo "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
cmake -B build -DBUILD_SHARED_LIBS=OFF
|
24 |
cmake --build build --config Release -j --target llama-quantize llama-gguf-split llama-imatrix
|
|
|
25 |
echo "Copying built binaries..."
|
26 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
rm -rf build
|
28 |
|
29 |
cd ..
|
30 |
-
|
|
|
31 |
python app.py
|
|
|
1 |
#!/bin/bash
|
2 |
|
3 |
+
echo "Starting setup process for auto-gguf-quant..."
|
4 |
+
|
5 |
+
# Make sure we have a clean environment
|
6 |
+
if [ -d "llama.cpp" ]; then
|
7 |
+
echo "Found existing llama.cpp directory, removing it for a fresh start..."
|
8 |
+
rm -rf llama.cpp
|
9 |
fi
|
10 |
|
11 |
+
# Clone llama.cpp with specific version that works well
|
12 |
+
echo "Cloning llama.cpp repository..."
|
13 |
+
git clone https://github.com/ggerganov/llama.cpp
|
14 |
+
|
15 |
+
# Copy calibration data to llama.cpp
|
16 |
+
echo "Copying calibration data..."
|
17 |
+
if [ -f "groups_merged.txt" ]; then
|
18 |
+
cp groups_merged.txt llama.cpp/
|
19 |
+
else
|
20 |
+
echo "Warning: groups_merged.txt not found in current directory."
|
21 |
fi
|
22 |
|
23 |
+
# Build the tools
|
24 |
+
echo "Building llama.cpp tools..."
|
25 |
+
cd llama.cpp
|
26 |
+
|
27 |
# Disable CUDA for HF spaces (not supported in free tier)
|
|
|
28 |
export GGML_CUDA=OFF
|
29 |
+
# Enable CPU optimizations
|
30 |
export GGML_AVX=1
|
31 |
export GGML_AVX2=1
|
32 |
|
33 |
+
# List all the files to make sure the converter is there
|
34 |
+
echo "Listing files in llama.cpp directory:"
|
35 |
+
ls -la
|
36 |
+
|
37 |
+
# Check for the existence of the converter script
|
38 |
+
echo "Checking for converter script..."
|
39 |
+
if [ -f "convert_hf_to_gguf.py" ]; then
|
40 |
+
echo "Found converter script: convert_hf_to_gguf.py"
|
41 |
+
elif [ -f "convert-hf-to-gguf.py" ]; then
|
42 |
+
echo "Found converter script: convert-hf-to-gguf.py"
|
43 |
+
else
|
44 |
+
echo "Warning: Converter script not found in the expected locations."
|
45 |
+
echo "Searching for any converter script..."
|
46 |
+
find . -name "convert*.py" | grep -i hf
|
47 |
+
fi
|
48 |
+
|
49 |
+
# Build with CMake
|
50 |
+
echo "Building with CMake..."
|
51 |
+
mkdir -p build
|
52 |
cmake -B build -DBUILD_SHARED_LIBS=OFF
|
53 |
cmake --build build --config Release -j --target llama-quantize llama-gguf-split llama-imatrix
|
54 |
+
|
55 |
echo "Copying built binaries..."
|
56 |
+
# First try bin directory (newer llama.cpp versions)
|
57 |
+
if [ -d "build/bin" ]; then
|
58 |
+
cp build/bin/llama-* ./ 2>/dev/null
|
59 |
+
echo "Copied from build/bin/"
|
60 |
+
ls -la ./llama-*
|
61 |
+
else
|
62 |
+
# Try direct build directory (older versions)
|
63 |
+
cp build/llama-* ./ 2>/dev/null
|
64 |
+
echo "Copied from build/"
|
65 |
+
ls -la ./llama-*
|
66 |
+
fi
|
67 |
+
|
68 |
+
# Cleanup build directory to save space
|
69 |
+
echo "Cleaning up build directory..."
|
70 |
rm -rf build
|
71 |
|
72 |
cd ..
|
73 |
+
|
74 |
+
echo "Setup complete. Starting Gradio app..."
|
75 |
python app.py
|