Spaces:

Sculptor-AI
/

auto-gguf-quant

Sleeping

App Files Files Community

Kaileh57 commited on Mar 10

Commit

e744ef3

1 Parent(s): 8c38f4a

fix

Browse files

Files changed (2) hide show

app.py +115 -8
start.sh +57 -13

app.py CHANGED Viewed

@@ -12,12 +12,11 @@ os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
 import gradio as gr
 from huggingface_hub import HfApi, list_repo_files, hf_hub_download, login, whoami
-from huggingface_hub.hf_api import CommitInfo  # This is the correct import
 from apscheduler.schedulers.background import BackgroundScheduler
 # MODEL_REPO to monitor
 SOURCE_MODEL_REPO = "Sculptor-AI/Ursa_Minor"
-CONVERSION_SCRIPT = "./llama.cpp/convert_hf_to_gguf.py"
 STATUS_FILE = "status.json"
 # Quantization configurations in order of processing
@@ -174,6 +173,79 @@ def check_for_updates():
         save_status()
         return False
 def process_model():
     """Process the model to create all quantized versions"""
     if processing_lock.locked():
@@ -182,6 +254,13 @@ def process_model():
     with processing_lock:
         try:
             # Validate authentication
             try:
                 user_info = whoami()
@@ -248,11 +327,26 @@ def process_model():
                     fp16_path = str(outdir / f"{model_name}.fp16.gguf")
                     log_message(f"Converting model to FP16: {fp16_path}")
                     result = subprocess.run([
                         "python", CONVERSION_SCRIPT, str(local_dir), "--outtype", "f16", "--outfile", fp16_path
                     ], shell=False, capture_output=True, text=True)
                     if result.returncode != 0:
                         raise Exception(f"Error converting to fp16: {result.stderr}")
                     log_message("Model converted to fp16 successfully!")
@@ -262,13 +356,22 @@ def process_model():
                     train_data_path = "llama.cpp/groups_merged.txt"  # Default calibration dataset
                     if not os.path.isfile(train_data_path):
-                        log_message(f"Warning: Training data file not found: {train_data_path}. Some quantizations may not work.", error=True)
-                    else:
-                        try:
                             generate_importance_matrix(fp16_path, train_data_path, imatrix_path)
-                        except Exception as e:
-                            log_message(f"Error generating importance matrix: {str(e)}", error=True)
                             imatrix_path = None
                     # Process each quantization type
                     total_quants = len(QUANT_CONFIGS)
@@ -474,7 +577,8 @@ def create_ui():
                     check_button = gr.Button("Check for Updates", variant="primary")
                     process_button = gr.Button("Force Processing", variant="secondary")
-        progress_bar = gr.Progress(label="Progress")
         with gr.Tab("Quantization Status"):
             quant_status = gr.DataFrame(
@@ -540,6 +644,9 @@ def initialize():
     # Load status from file
     load_status()
     # Schedule regular checks for updates
     scheduler = BackgroundScheduler()
     scheduler.add_job(check_and_process, 'interval', minutes=60)  # Check every hour

 import gradio as gr
 from huggingface_hub import HfApi, list_repo_files, hf_hub_download, login, whoami
 from apscheduler.schedulers.background import BackgroundScheduler
 # MODEL_REPO to monitor
 SOURCE_MODEL_REPO = "Sculptor-AI/Ursa_Minor"
+CONVERSION_SCRIPT = "./llama.cpp/convert-hf-to-gguf.py"  # Updated script path
 STATUS_FILE = "status.json"
 # Quantization configurations in order of processing
         save_status()
         return False
+def check_llama_cpp():
+    """Check if llama.cpp is properly set up and build if needed"""
+    try:
+        if not os.path.exists("llama.cpp"):
+            log_message("llama.cpp directory not found, cloning repository...")
+            subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp"], check=True)
+        # Check for critical files
+        converter_path = os.path.join("llama.cpp", "convert-hf-to-gguf.py")
+        if not os.path.exists(converter_path):
+            # Try alternative path
+            old_converter_path = os.path.join("llama.cpp", "convert_hf_to_gguf.py")
+            if os.path.exists(old_converter_path):
+                log_message(f"Found converter at {old_converter_path}, using this path")
+                global CONVERSION_SCRIPT
+                CONVERSION_SCRIPT = old_converter_path
+            else:
+                log_message("Converter script not found, listing files in llama.cpp...")
+                files = os.listdir("llama.cpp")
+                log_message(f"Files in llama.cpp: {files}")
+                # Search for any converter script
+                for file in files:
+                    if file.startswith("convert") and file.endswith(".py"):
+                        log_message(f"Found alternative converter: {file}")
+                        CONVERSION_SCRIPT = os.path.join("llama.cpp", file)
+                        break
+        # Build the tools
+        log_message("Building llama.cpp tools...")
+        os.chdir("llama.cpp")
+        # Check if build directory exists
+        if not os.path.exists("build"):
+            os.makedirs("build")
+        # Configure and build
+        subprocess.run(["cmake", "-B", "build", "-DBUILD_SHARED_LIBS=OFF"], check=True)
+        subprocess.run(["cmake", "--build", "build", "--config", "Release", "-j", "--target", "llama-quantize", "llama-gguf-split", "llama-imatrix"], check=True)
+        # Copy binaries
+        log_message("Copying built binaries...")
+        try:
+            # Different builds may put binaries in different places
+            if os.path.exists(os.path.join("build", "bin")):
+                for binary in ["llama-quantize", "llama-gguf-split", "llama-imatrix"]:
+                    src = os.path.join("build", "bin", binary)
+                    if os.path.exists(src):
+                        subprocess.run(["cp", src, "./"], check=True)
+            else:
+                for binary in ["llama-quantize", "llama-gguf-split", "llama-imatrix"]:
+                    src = os.path.join("build", binary)
+                    if os.path.exists(src):
+                        subprocess.run(["cp", src, "./"], check=True)
+        except Exception as e:
+            log_message(f"Error copying binaries: {str(e)}", error=True)
+        # Return to the original directory
+        os.chdir("..")
+        # Make sure we have the calibration data
+        if not os.path.exists(os.path.join("llama.cpp", "groups_merged.txt")):
+            log_message("Copying calibration data...")
+            if os.path.exists("groups_merged.txt"):
+                subprocess.run(["cp", "groups_merged.txt", "llama.cpp/"], check=True)
+        log_message("llama.cpp setup completed successfully")
+        return True
+    except Exception as e:
+        log_message(f"Error setting up llama.cpp: {str(e)}", error=True)
+        traceback.print_exc()
+        return False
 def process_model():
     """Process the model to create all quantized versions"""
     if processing_lock.locked():
     with processing_lock:
         try:
+            # Check llama.cpp is set up
+            if not check_llama_cpp():
+                log_message("Failed to set up llama.cpp, aborting", error=True)
+                current_status["status"] = "Error setting up llama.cpp"
+                save_status()
+                return
             # Validate authentication
             try:
                 user_info = whoami()
                     fp16_path = str(outdir / f"{model_name}.fp16.gguf")
                     log_message(f"Converting model to FP16: {fp16_path}")
+                    # Check if the converter script exists
+                    if not os.path.exists(CONVERSION_SCRIPT):
+                        log_message(f"Converter script not found at {CONVERSION_SCRIPT}, searching for alternatives", error=True)
+                        for root, dirs, files in os.walk("llama.cpp"):
+                            for file in files:
+                                if file.startswith("convert") and file.endswith(".py"):
+                                    global CONVERSION_SCRIPT
+                                    CONVERSION_SCRIPT = os.path.join(root, file)
+                                    log_message(f"Found converter at {CONVERSION_SCRIPT}")
+                                    break
+                    log_message(f"Using converter script: {CONVERSION_SCRIPT}")
                     result = subprocess.run([
                         "python", CONVERSION_SCRIPT, str(local_dir), "--outtype", "f16", "--outfile", fp16_path
                     ], shell=False, capture_output=True, text=True)
                     if result.returncode != 0:
+                        log_message(f"Converter stderr: {result.stderr}")
+                        log_message(f"Converter stdout: {result.stdout}")
                         raise Exception(f"Error converting to fp16: {result.stderr}")
                     log_message("Model converted to fp16 successfully!")
                     train_data_path = "llama.cpp/groups_merged.txt"  # Default calibration dataset
                     if not os.path.isfile(train_data_path):
+                        log_message(f"Warning: Training data file not found at {train_data_path}, searching alternatives...")
+                        # Try to find it elsewhere
+                        if os.path.exists("groups_merged.txt"):
+                            train_data_path = "groups_merged.txt"
+                            log_message(f"Found training data at {train_data_path}")
+                        else:
+                            log_message("Calibration data not found. Some quantizations may not work.", error=True)
+                    try:
+                        if os.path.isfile(train_data_path):
                             generate_importance_matrix(fp16_path, train_data_path, imatrix_path)
+                        else:
                             imatrix_path = None
+                    except Exception as e:
+                        log_message(f"Error generating importance matrix: {str(e)}", error=True)
+                        imatrix_path = None
                     # Process each quantization type
                     total_quants = len(QUANT_CONFIGS)
                     check_button = gr.Button("Check for Updates", variant="primary")
                     process_button = gr.Button("Force Processing", variant="secondary")
+        # Remove the 'label' parameter since it's not supported
+        progress_bar = gr.Progress()
         with gr.Tab("Quantization Status"):
             quant_status = gr.DataFrame(
     # Load status from file
     load_status()
+    # Check and setup llama.cpp
+    check_llama_cpp()
     # Schedule regular checks for updates
     scheduler = BackgroundScheduler()
     scheduler.add_job(check_and_process, 'interval', minutes=60)  # Check every hour

start.sh CHANGED Viewed

@@ -1,31 +1,75 @@
 #!/bin/bash
-# Clone llama.cpp if not exists
-if [ ! -d "llama.cpp" ]; then
-  echo "Cloning llama.cpp repository..."
-  git clone https://github.com/ggerganov/llama.cpp
 fi
-# Copy calibration data if not exists
-if [ ! -f "llama.cpp/groups_merged.txt" ]; then
-  echo "Copying calibration data..."
-  cp groups_merged.txt llama.cpp/groups_merged.txt
 fi
 # Disable CUDA for HF spaces (not supported in free tier)
-# We should still build with optimizations for CPU
 export GGML_CUDA=OFF
 export GGML_AVX=1
 export GGML_AVX2=1
-cd llama.cpp
-echo "Building llama.cpp tools..."
 cmake -B build -DBUILD_SHARED_LIBS=OFF
 cmake --build build --config Release -j --target llama-quantize llama-gguf-split llama-imatrix
 echo "Copying built binaries..."
-cp ./build/bin/llama-* ./ 2>/dev/null || cp ./build/llama-* ./ 2>/dev/null
 rm -rf build
 cd ..
-echo "Starting Gradio app..."
 python app.py

 #!/bin/bash
+echo "Starting setup process for auto-gguf-quant..."
+# Make sure we have a clean environment
+if [ -d "llama.cpp" ]; then
+  echo "Found existing llama.cpp directory, removing it for a fresh start..."
+  rm -rf llama.cpp
 fi
+# Clone llama.cpp with specific version that works well
+echo "Cloning llama.cpp repository..."
+git clone https://github.com/ggerganov/llama.cpp
+# Copy calibration data to llama.cpp
+echo "Copying calibration data..."
+if [ -f "groups_merged.txt" ]; then
+  cp groups_merged.txt llama.cpp/
+else
+  echo "Warning: groups_merged.txt not found in current directory."
 fi
+# Build the tools
+echo "Building llama.cpp tools..."
+cd llama.cpp
 # Disable CUDA for HF spaces (not supported in free tier)
 export GGML_CUDA=OFF
+# Enable CPU optimizations
 export GGML_AVX=1
 export GGML_AVX2=1
+# List all the files to make sure the converter is there
+echo "Listing files in llama.cpp directory:"
+ls -la
+# Check for the existence of the converter script
+echo "Checking for converter script..."
+if [ -f "convert_hf_to_gguf.py" ]; then
+  echo "Found converter script: convert_hf_to_gguf.py"
+elif [ -f "convert-hf-to-gguf.py" ]; then
+  echo "Found converter script: convert-hf-to-gguf.py"
+else
+  echo "Warning: Converter script not found in the expected locations."
+  echo "Searching for any converter script..."
+  find . -name "convert*.py" | grep -i hf
+fi
+# Build with CMake
+echo "Building with CMake..."
+mkdir -p build
 cmake -B build -DBUILD_SHARED_LIBS=OFF
 cmake --build build --config Release -j --target llama-quantize llama-gguf-split llama-imatrix
 echo "Copying built binaries..."
+# First try bin directory (newer llama.cpp versions)
+if [ -d "build/bin" ]; then
+  cp build/bin/llama-* ./ 2>/dev/null
+  echo "Copied from build/bin/"
+  ls -la ./llama-*
+else
+  # Try direct build directory (older versions)
+  cp build/llama-* ./ 2>/dev/null
+  echo "Copied from build/"
+  ls -la ./llama-*
+fi
+# Cleanup build directory to save space
+echo "Cleaning up build directory..."
 rm -rf build
 cd ..
+echo "Setup complete. Starting Gradio app..."
 python app.py