Kaileh57 commited on
Commit
e744ef3
·
1 Parent(s): 8c38f4a
Files changed (2) hide show
  1. app.py +115 -8
  2. start.sh +57 -13
app.py CHANGED
@@ -12,12 +12,11 @@ os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
12
  import gradio as gr
13
 
14
  from huggingface_hub import HfApi, list_repo_files, hf_hub_download, login, whoami
15
- from huggingface_hub.hf_api import CommitInfo # This is the correct import
16
  from apscheduler.schedulers.background import BackgroundScheduler
17
 
18
  # MODEL_REPO to monitor
19
  SOURCE_MODEL_REPO = "Sculptor-AI/Ursa_Minor"
20
- CONVERSION_SCRIPT = "./llama.cpp/convert_hf_to_gguf.py"
21
  STATUS_FILE = "status.json"
22
 
23
  # Quantization configurations in order of processing
@@ -174,6 +173,79 @@ def check_for_updates():
174
  save_status()
175
  return False
176
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  def process_model():
178
  """Process the model to create all quantized versions"""
179
  if processing_lock.locked():
@@ -182,6 +254,13 @@ def process_model():
182
 
183
  with processing_lock:
184
  try:
 
 
 
 
 
 
 
185
  # Validate authentication
186
  try:
187
  user_info = whoami()
@@ -248,11 +327,26 @@ def process_model():
248
  fp16_path = str(outdir / f"{model_name}.fp16.gguf")
249
  log_message(f"Converting model to FP16: {fp16_path}")
250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
  result = subprocess.run([
252
  "python", CONVERSION_SCRIPT, str(local_dir), "--outtype", "f16", "--outfile", fp16_path
253
  ], shell=False, capture_output=True, text=True)
254
 
255
  if result.returncode != 0:
 
 
256
  raise Exception(f"Error converting to fp16: {result.stderr}")
257
 
258
  log_message("Model converted to fp16 successfully!")
@@ -262,13 +356,22 @@ def process_model():
262
  train_data_path = "llama.cpp/groups_merged.txt" # Default calibration dataset
263
 
264
  if not os.path.isfile(train_data_path):
265
- log_message(f"Warning: Training data file not found: {train_data_path}. Some quantizations may not work.", error=True)
266
- else:
267
- try:
 
 
 
 
 
 
 
268
  generate_importance_matrix(fp16_path, train_data_path, imatrix_path)
269
- except Exception as e:
270
- log_message(f"Error generating importance matrix: {str(e)}", error=True)
271
  imatrix_path = None
 
 
 
272
 
273
  # Process each quantization type
274
  total_quants = len(QUANT_CONFIGS)
@@ -474,7 +577,8 @@ def create_ui():
474
  check_button = gr.Button("Check for Updates", variant="primary")
475
  process_button = gr.Button("Force Processing", variant="secondary")
476
 
477
- progress_bar = gr.Progress(label="Progress")
 
478
 
479
  with gr.Tab("Quantization Status"):
480
  quant_status = gr.DataFrame(
@@ -540,6 +644,9 @@ def initialize():
540
  # Load status from file
541
  load_status()
542
 
 
 
 
543
  # Schedule regular checks for updates
544
  scheduler = BackgroundScheduler()
545
  scheduler.add_job(check_and_process, 'interval', minutes=60) # Check every hour
 
12
  import gradio as gr
13
 
14
  from huggingface_hub import HfApi, list_repo_files, hf_hub_download, login, whoami
 
15
  from apscheduler.schedulers.background import BackgroundScheduler
16
 
17
  # MODEL_REPO to monitor
18
  SOURCE_MODEL_REPO = "Sculptor-AI/Ursa_Minor"
19
+ CONVERSION_SCRIPT = "./llama.cpp/convert-hf-to-gguf.py" # Updated script path
20
  STATUS_FILE = "status.json"
21
 
22
  # Quantization configurations in order of processing
 
173
  save_status()
174
  return False
175
 
176
+ def check_llama_cpp():
177
+ """Check if llama.cpp is properly set up and build if needed"""
178
+ try:
179
+ if not os.path.exists("llama.cpp"):
180
+ log_message("llama.cpp directory not found, cloning repository...")
181
+ subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp"], check=True)
182
+
183
+ # Check for critical files
184
+ converter_path = os.path.join("llama.cpp", "convert-hf-to-gguf.py")
185
+ if not os.path.exists(converter_path):
186
+ # Try alternative path
187
+ old_converter_path = os.path.join("llama.cpp", "convert_hf_to_gguf.py")
188
+ if os.path.exists(old_converter_path):
189
+ log_message(f"Found converter at {old_converter_path}, using this path")
190
+ global CONVERSION_SCRIPT
191
+ CONVERSION_SCRIPT = old_converter_path
192
+ else:
193
+ log_message("Converter script not found, listing files in llama.cpp...")
194
+ files = os.listdir("llama.cpp")
195
+ log_message(f"Files in llama.cpp: {files}")
196
+
197
+ # Search for any converter script
198
+ for file in files:
199
+ if file.startswith("convert") and file.endswith(".py"):
200
+ log_message(f"Found alternative converter: {file}")
201
+ CONVERSION_SCRIPT = os.path.join("llama.cpp", file)
202
+ break
203
+
204
+ # Build the tools
205
+ log_message("Building llama.cpp tools...")
206
+ os.chdir("llama.cpp")
207
+
208
+ # Check if build directory exists
209
+ if not os.path.exists("build"):
210
+ os.makedirs("build")
211
+
212
+ # Configure and build
213
+ subprocess.run(["cmake", "-B", "build", "-DBUILD_SHARED_LIBS=OFF"], check=True)
214
+ subprocess.run(["cmake", "--build", "build", "--config", "Release", "-j", "--target", "llama-quantize", "llama-gguf-split", "llama-imatrix"], check=True)
215
+
216
+ # Copy binaries
217
+ log_message("Copying built binaries...")
218
+ try:
219
+ # Different builds may put binaries in different places
220
+ if os.path.exists(os.path.join("build", "bin")):
221
+ for binary in ["llama-quantize", "llama-gguf-split", "llama-imatrix"]:
222
+ src = os.path.join("build", "bin", binary)
223
+ if os.path.exists(src):
224
+ subprocess.run(["cp", src, "./"], check=True)
225
+ else:
226
+ for binary in ["llama-quantize", "llama-gguf-split", "llama-imatrix"]:
227
+ src = os.path.join("build", binary)
228
+ if os.path.exists(src):
229
+ subprocess.run(["cp", src, "./"], check=True)
230
+ except Exception as e:
231
+ log_message(f"Error copying binaries: {str(e)}", error=True)
232
+
233
+ # Return to the original directory
234
+ os.chdir("..")
235
+
236
+ # Make sure we have the calibration data
237
+ if not os.path.exists(os.path.join("llama.cpp", "groups_merged.txt")):
238
+ log_message("Copying calibration data...")
239
+ if os.path.exists("groups_merged.txt"):
240
+ subprocess.run(["cp", "groups_merged.txt", "llama.cpp/"], check=True)
241
+
242
+ log_message("llama.cpp setup completed successfully")
243
+ return True
244
+ except Exception as e:
245
+ log_message(f"Error setting up llama.cpp: {str(e)}", error=True)
246
+ traceback.print_exc()
247
+ return False
248
+
249
  def process_model():
250
  """Process the model to create all quantized versions"""
251
  if processing_lock.locked():
 
254
 
255
  with processing_lock:
256
  try:
257
+ # Check llama.cpp is set up
258
+ if not check_llama_cpp():
259
+ log_message("Failed to set up llama.cpp, aborting", error=True)
260
+ current_status["status"] = "Error setting up llama.cpp"
261
+ save_status()
262
+ return
263
+
264
  # Validate authentication
265
  try:
266
  user_info = whoami()
 
327
  fp16_path = str(outdir / f"{model_name}.fp16.gguf")
328
  log_message(f"Converting model to FP16: {fp16_path}")
329
 
330
+ # Check if the converter script exists
331
+ if not os.path.exists(CONVERSION_SCRIPT):
332
+ log_message(f"Converter script not found at {CONVERSION_SCRIPT}, searching for alternatives", error=True)
333
+ for root, dirs, files in os.walk("llama.cpp"):
334
+ for file in files:
335
+ if file.startswith("convert") and file.endswith(".py"):
336
+ global CONVERSION_SCRIPT
337
+ CONVERSION_SCRIPT = os.path.join(root, file)
338
+ log_message(f"Found converter at {CONVERSION_SCRIPT}")
339
+ break
340
+
341
+ log_message(f"Using converter script: {CONVERSION_SCRIPT}")
342
+
343
  result = subprocess.run([
344
  "python", CONVERSION_SCRIPT, str(local_dir), "--outtype", "f16", "--outfile", fp16_path
345
  ], shell=False, capture_output=True, text=True)
346
 
347
  if result.returncode != 0:
348
+ log_message(f"Converter stderr: {result.stderr}")
349
+ log_message(f"Converter stdout: {result.stdout}")
350
  raise Exception(f"Error converting to fp16: {result.stderr}")
351
 
352
  log_message("Model converted to fp16 successfully!")
 
356
  train_data_path = "llama.cpp/groups_merged.txt" # Default calibration dataset
357
 
358
  if not os.path.isfile(train_data_path):
359
+ log_message(f"Warning: Training data file not found at {train_data_path}, searching alternatives...")
360
+ # Try to find it elsewhere
361
+ if os.path.exists("groups_merged.txt"):
362
+ train_data_path = "groups_merged.txt"
363
+ log_message(f"Found training data at {train_data_path}")
364
+ else:
365
+ log_message("Calibration data not found. Some quantizations may not work.", error=True)
366
+
367
+ try:
368
+ if os.path.isfile(train_data_path):
369
  generate_importance_matrix(fp16_path, train_data_path, imatrix_path)
370
+ else:
 
371
  imatrix_path = None
372
+ except Exception as e:
373
+ log_message(f"Error generating importance matrix: {str(e)}", error=True)
374
+ imatrix_path = None
375
 
376
  # Process each quantization type
377
  total_quants = len(QUANT_CONFIGS)
 
577
  check_button = gr.Button("Check for Updates", variant="primary")
578
  process_button = gr.Button("Force Processing", variant="secondary")
579
 
580
+ # Remove the 'label' parameter since it's not supported
581
+ progress_bar = gr.Progress()
582
 
583
  with gr.Tab("Quantization Status"):
584
  quant_status = gr.DataFrame(
 
644
  # Load status from file
645
  load_status()
646
 
647
+ # Check and setup llama.cpp
648
+ check_llama_cpp()
649
+
650
  # Schedule regular checks for updates
651
  scheduler = BackgroundScheduler()
652
  scheduler.add_job(check_and_process, 'interval', minutes=60) # Check every hour
start.sh CHANGED
@@ -1,31 +1,75 @@
1
  #!/bin/bash
2
 
3
- # Clone llama.cpp if not exists
4
- if [ ! -d "llama.cpp" ]; then
5
- echo "Cloning llama.cpp repository..."
6
- git clone https://github.com/ggerganov/llama.cpp
 
 
7
  fi
8
 
9
- # Copy calibration data if not exists
10
- if [ ! -f "llama.cpp/groups_merged.txt" ]; then
11
- echo "Copying calibration data..."
12
- cp groups_merged.txt llama.cpp/groups_merged.txt
 
 
 
 
 
 
13
  fi
14
 
 
 
 
 
15
  # Disable CUDA for HF spaces (not supported in free tier)
16
- # We should still build with optimizations for CPU
17
  export GGML_CUDA=OFF
 
18
  export GGML_AVX=1
19
  export GGML_AVX2=1
20
 
21
- cd llama.cpp
22
- echo "Building llama.cpp tools..."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  cmake -B build -DBUILD_SHARED_LIBS=OFF
24
  cmake --build build --config Release -j --target llama-quantize llama-gguf-split llama-imatrix
 
25
  echo "Copying built binaries..."
26
- cp ./build/bin/llama-* ./ 2>/dev/null || cp ./build/llama-* ./ 2>/dev/null
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  rm -rf build
28
 
29
  cd ..
30
- echo "Starting Gradio app..."
 
31
  python app.py
 
1
  #!/bin/bash
2
 
3
+ echo "Starting setup process for auto-gguf-quant..."
4
+
5
+ # Make sure we have a clean environment
6
+ if [ -d "llama.cpp" ]; then
7
+ echo "Found existing llama.cpp directory, removing it for a fresh start..."
8
+ rm -rf llama.cpp
9
  fi
10
 
11
+ # Clone llama.cpp with specific version that works well
12
+ echo "Cloning llama.cpp repository..."
13
+ git clone https://github.com/ggerganov/llama.cpp
14
+
15
+ # Copy calibration data to llama.cpp
16
+ echo "Copying calibration data..."
17
+ if [ -f "groups_merged.txt" ]; then
18
+ cp groups_merged.txt llama.cpp/
19
+ else
20
+ echo "Warning: groups_merged.txt not found in current directory."
21
  fi
22
 
23
+ # Build the tools
24
+ echo "Building llama.cpp tools..."
25
+ cd llama.cpp
26
+
27
  # Disable CUDA for HF spaces (not supported in free tier)
 
28
  export GGML_CUDA=OFF
29
+ # Enable CPU optimizations
30
  export GGML_AVX=1
31
  export GGML_AVX2=1
32
 
33
+ # List all the files to make sure the converter is there
34
+ echo "Listing files in llama.cpp directory:"
35
+ ls -la
36
+
37
+ # Check for the existence of the converter script
38
+ echo "Checking for converter script..."
39
+ if [ -f "convert_hf_to_gguf.py" ]; then
40
+ echo "Found converter script: convert_hf_to_gguf.py"
41
+ elif [ -f "convert-hf-to-gguf.py" ]; then
42
+ echo "Found converter script: convert-hf-to-gguf.py"
43
+ else
44
+ echo "Warning: Converter script not found in the expected locations."
45
+ echo "Searching for any converter script..."
46
+ find . -name "convert*.py" | grep -i hf
47
+ fi
48
+
49
+ # Build with CMake
50
+ echo "Building with CMake..."
51
+ mkdir -p build
52
  cmake -B build -DBUILD_SHARED_LIBS=OFF
53
  cmake --build build --config Release -j --target llama-quantize llama-gguf-split llama-imatrix
54
+
55
  echo "Copying built binaries..."
56
+ # First try bin directory (newer llama.cpp versions)
57
+ if [ -d "build/bin" ]; then
58
+ cp build/bin/llama-* ./ 2>/dev/null
59
+ echo "Copied from build/bin/"
60
+ ls -la ./llama-*
61
+ else
62
+ # Try direct build directory (older versions)
63
+ cp build/llama-* ./ 2>/dev/null
64
+ echo "Copied from build/"
65
+ ls -la ./llama-*
66
+ fi
67
+
68
+ # Cleanup build directory to save space
69
+ echo "Cleaning up build directory..."
70
  rm -rf build
71
 
72
  cd ..
73
+
74
+ echo "Setup complete. Starting Gradio app..."
75
  python app.py