Spaces:

Sculptor-AI
/

auto-gguf-quant

Sleeping

App Files Files Community

Kaileh57 commited on Mar 10

Commit

29e0bb8

1 Parent(s): 0cd0ab4

fix

Browse files

Files changed (1) hide show

setup.sh +25 -194

setup.sh CHANGED Viewed

@@ -1,207 +1,37 @@
 #!/bin/bash
 set -e
-echo "Setting up GGUF Quantizer..."
-# Clone a minimal version of llama.cpp repository just for reference
 if [ ! -d "llama.cpp" ]; then
-  echo "Creating llama.cpp directory..."
-  mkdir -p llama.cpp
 fi
-# Create a minimal conversion script that doesn't depend on llama.cpp
-echo "Creating conversion script..."
-cat > llama.cpp/convert.py << 'EOL'
-#!/usr/bin/env python3
-import os
-import sys
-import argparse
-import torch
-import json
-import shutil
-from pathlib import Path
-def mock_convert_to_gguf(model_path, output_path, model_type="f16"):
-    """
-    Mock conversion function that creates a dummy GGUF file
-    For demonstration purposes in the free tier
-    """
-    print(f"Converting {model_path} to {output_path} as {model_type}")
-    # Create a mock output file with some metadata
-    model_name = os.path.basename(model_path)
-    # Try to get some real model info if possible
-    config_path = os.path.join(model_path, "config.json")
-    model_info = {
-        "name": model_name,
-        "type": model_type,
-        "created": "2025-03-09",
-        "format": "GGUF"
-    }
-    if os.path.exists(config_path):
-        try:
-            with open(config_path, 'r') as f:
-                config = json.load(f)
-            if "model_type" in config:
-                model_info["architecture"] = config["model_type"]
-            if "vocab_size" in config:
-                model_info["vocab_size"] = config["vocab_size"]
-        except Exception as e:
-            print(f"Error reading config: {e}")
-    # Save model info as a small file
-    with open(output_path, 'w') as f:
-        f.write(json.dumps(model_info, indent=2))
-    # Make the file a reasonable size to simulate a real model
-    # Just append some data to make it look like a real file
-    with open(output_path, 'ab') as f:
-        f.write(b'\0' * 1024 * 1024)  # Add 1MB of data
-    print(f"Mock conversion completed: {output_path}")
-    return True
-def main():
-    parser = argparse.ArgumentParser(description="Convert model to GGUF format")
-    parser.add_argument("model_path", help="Path to the model directory")
-    parser.add_argument("--outfile", help="Output file path")
-    parser.add_argument("--outtype", default="f16", help="Output type")
-    args = parser.parse_args()
-    try:
-        output_path = args.outfile
-        if not output_path:
-            output_path = f"{os.path.basename(args.model_path)}.gguf"
-        success = mock_convert_to_gguf(args.model_path, output_path, args.outtype)
-        return 0 if success else 1
-    except Exception as e:
-        print(f"Error: {e}")
-        return 1
-if __name__ == "__main__":
-    sys.exit(main())
-EOL
-chmod +x llama.cpp/convert.py
-# Create a quantization script
-echo "Creating quantization script..."
-cat > llama.cpp/quantize.py << 'EOL'
-#!/usr/bin/env python3
-import os
-import sys
-import argparse
-import json
-import shutil
-from pathlib import Path
-def mock_quantize(input_path, output_path, quant_type):
-    """
-    Mock quantization function that creates a dummy GGUF file
-    For demonstration purposes in the free tier
-    """
-    print(f"Quantizing {input_path} to {output_path} with {quant_type}")
-    # Read the input file if it exists
-    model_info = {}
-    if os.path.exists(input_path):
-        try:
-            with open(input_path, 'r') as f:
-                content = f.read(1024 * 10)  # Read first 10KB (metadata should be at start)
-                # Try to parse JSON from the beginning of the file
-                try:
-                    start_json = content.strip().split('{', 1)[0] + '{'
-                    end_json = '}'
-                    json_str = start_json + content.split(start_json, 1)[1].split(end_json, 1)[0] + end_json
-                    model_info = json.loads(json_str)
-                except:
-                    # If that fails, assume the first part might be JSON
-                    try:
-                        model_info = json.loads(content.split('\0', 1)[0])
-                    except:
-                        print("Could not parse input file metadata")
-        except Exception as e:
-            print(f"Error reading input file: {e}")
-    # Update with quantization info
-    model_info.update({
-        "quantization": quant_type,
-        "bits_per_token": get_bits_per_token(quant_type),
-        "quantized_at": "2025-03-09"
-    })
-    # Create the output file with updated metadata
-    with open(output_path, 'w') as f:
-        f.write(json.dumps(model_info, indent=2))
-    # Make the file a reasonable size based on the quantization type
-    size_factor = get_size_factor(quant_type)
-    with open(output_path, 'ab') as f:
-        f.write(b'\0' * int(1024 * 1024 * size_factor))  # Add data proportional to quantization
-    print(f"Mock quantization completed: {output_path}")
-    return True
-def get_bits_per_token(quant_type):
-    """Get the bits per token for the quantization type"""
-    if quant_type == "f16":
-        return 16
-    elif quant_type.startswith("Q8"):
-        return 8
-    elif quant_type.startswith("Q6"):
-        return 6
-    elif quant_type.startswith("Q5"):
-        return 5
-    elif quant_type.startswith("Q4"):
-        return 4
-    elif quant_type.startswith("Q3"):
-        return 3
-    elif quant_type.startswith("Q2"):
-        return 2
-    else:
-        return 4  # Default
-def get_size_factor(quant_type):
-    """Get the size factor for the quantization type (in MB)"""
-    if quant_type == "f16":
-        return 3.0
-    elif quant_type.startswith("Q8"):
-        return 1.5
-    elif quant_type.startswith("Q6"):
-        return 1.2
-    elif quant_type.startswith("Q5"):
-        return 1.0
-    elif quant_type.startswith("Q4"):
-        return 0.9
-    elif quant_type.startswith("Q3"):
-        return 0.8
-    elif quant_type.startswith("Q2"):
-        return 0.7
-    else:
-        return 1.0  # Default
-def main():
-    parser = argparse.ArgumentParser(description="Quantize GGUF model")
-    parser.add_argument("input_file", help="Input GGUF file")
-    parser.add_argument("output_file", help="Output quantized GGUF file")
-    parser.add_argument("quant_type", help="Quantization type")
-    args = parser.parse_args()
-    try:
-        success = mock_quantize(args.input_file, args.output_file, args.quant_type)
-        return 0 if success else 1
-    except Exception as e:
-        print(f"Error: {e}")
-        return 1
-if __name__ == "__main__":
-    sys.exit(main())
-EOL
-chmod +x llama.cpp/quantize.py
 # Initialize state file
 if [ ! -f "state.json" ]; then
   echo "Initializing state file..."
   echo '{"last_checked": null, "last_commit_hash": null, "is_up_to_date": true, "is_processing": false, "current_quant": null, "progress": 0, "total_quants": 12, "completed_quants": [], "failed_quants": [], "out_of_memory": false, "last_error": null, "status_message": "Ready to check for updates"}' > state.json
@@ -215,3 +45,4 @@ mkdir -p temp_outputs
 echo "Setup completed successfully"

 #!/bin/bash
 set -e
+echo "Setting up for real GGUF quantization..."
+# Clone llama.cpp
 if [ ! -d "llama.cpp" ]; then
+  echo "Cloning llama.cpp repository..."
+  git clone --depth=1 https://github.com/ggerganov/llama.cpp
 fi
+cd llama.cpp
+# Get conversion script
+echo "Setting up conversion script..."
+if [ -f "convert.py" ]; then
+  echo "Found existing convert.py script"
+elif [ -f "convert-hf-to-gguf.py" ]; then
+  echo "Found convert-hf-to-gguf.py"
+  cp convert-hf-to-gguf.py convert.py
+elif [ -f "examples/convert-hf-to-gguf.py" ]; then
+  echo "Found examples/convert-hf-to-gguf.py"
+  cp examples/convert-hf-to-gguf.py convert.py
+else
+  echo "Cannot find conversion script. Using Python alternative."
+  # Install required packages
+  pip install -q transformers torch
+fi
+# Install required packages for the conversion script
+pip install -q transformers torch
 # Initialize state file
+cd ..
 if [ ! -f "state.json" ]; then
   echo "Initializing state file..."
   echo '{"last_checked": null, "last_commit_hash": null, "is_up_to_date": true, "is_processing": false, "current_quant": null, "progress": 0, "total_quants": 12, "completed_quants": [], "failed_quants": [], "out_of_memory": false, "last_error": null, "status_message": "Ready to check for updates"}' > state.json
 echo "Setup completed successfully"