File size: 27,109 Bytes
8df3b1d
a9add37
 
0a23172
a9add37
 
0a23172
a9add37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0e6bad1
a9add37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0a23172
a9add37
 
0a23172
a9add37
 
0a23172
a9add37
 
 
 
 
 
 
 
0a23172
a9add37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8df3b1d
 
a9add37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0a23172
a9add37
 
 
 
0a23172
a9add37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0a23172
a9add37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0cd0ab4
 
a9add37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0cd0ab4
 
 
 
 
 
 
 
 
 
a9add37
 
 
 
69d2a67
a9add37
 
0cd0ab4
 
 
 
 
 
 
 
 
 
a9add37
 
 
 
69d2a67
a9add37
 
0cd0ab4
 
 
 
 
 
 
 
 
 
a9add37
 
69d2a67
a9add37
 
0cd0ab4
 
 
 
 
 
 
 
 
 
a9add37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0a23172
a9add37
 
0e6bad1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
import os
import subprocess
import signal
import time
import json
from datetime import datetime
import threading
import logging
import gradio as gr
from huggingface_hub import HfApi, login, whoami
from pathlib import Path
import shutil

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Constants
SOURCE_REPO = "Sculptor-AI/Ursa_Minor"
HF_TOKEN = os.environ.get("HF_TOKEN")
CONVERSION_SCRIPT = "./llama.cpp/convert.py"
MODEL_CACHE_DIR = "model_cache"
TEMP_DIR = "temp_outputs"
# Restored full quantization set, sorted from smallest to largest
QUANT_CONFIGS = [
    {"name": "Q2_K", "size_gb": 0.8, "notes": "smallest size"},
    {"name": "Q3_K_S", "size_gb": 0.9, "notes": "small size"},
    {"name": "Q3_K_M", "size_gb": 0.9, "notes": "lower quality"},
    {"name": "Q3_K_L", "size_gb": 1.0, "notes": ""},
    {"name": "IQ4_XS", "size_gb": 1.0, "notes": ""},
    {"name": "Q4_K_S", "size_gb": 1.0, "notes": "fast, recommended"},
    {"name": "Q4_K_M", "size_gb": 1.1, "notes": "fast, recommended"},
    {"name": "Q5_K_S", "size_gb": 1.2, "notes": "good balance"},
    {"name": "Q5_K_M", "size_gb": 1.2, "notes": ""},
    {"name": "Q6_K", "size_gb": 1.4, "notes": "very good quality"},
    {"name": "Q8_0", "size_gb": 1.7, "notes": "fast, best quality"},
    {"name": "f16", "size_gb": 3.2, "notes": "16 bpw, full precision"}
]

# State variables
state = {
    "last_checked": None,
    "last_commit_hash": None,
    "is_up_to_date": True,
    "is_processing": False,
    "current_quant": None,
    "progress": 0,
    "total_quants": len(QUANT_CONFIGS),
    "completed_quants": [],
    "failed_quants": [],
    "out_of_memory": False,
    "last_error": None,
    "status_message": "Ready to check for updates"
}

# Initialize HF API
hf_api = HfApi(token=HF_TOKEN)

# Set up llama.cpp tools on first run
if not os.path.exists("./llama.cpp/convert.py"):
    try:
        logger.info("Setting up llama.cpp tools...")
        subprocess.run(["bash", "setup.sh"], check=True)
        logger.info("Setup completed successfully")
    except subprocess.CalledProcessError as e:
        logger.error(f"Error setting up llama.cpp tools: {e}")
        raise

# Helper functions
def save_state():
    with open("state.json", "w") as f:
        # Create a serializable copy of the state
        serializable_state = state.copy()
        serializable_state["last_checked"] = str(serializable_state["last_checked"]) if serializable_state["last_checked"] else None
        json.dump(serializable_state, f)
        
def load_state():
    global state
    try:
        if os.path.exists("state.json"):
            with open("state.json", "r") as f:
                loaded_state = json.load(f)
                # Convert string back to datetime if it exists
                if loaded_state.get("last_checked"):
                    loaded_state["last_checked"] = datetime.fromisoformat(loaded_state["last_checked"])
                state.update(loaded_state)
    except Exception as e:
        logger.error(f"Error loading state: {e}")

def get_latest_commit():
    try:
        repo_info = hf_api.repo_info(repo_id=SOURCE_REPO)
        return repo_info.sha
    except Exception as e:
        logger.error(f"Error getting latest commit: {e}")
        return None

def check_for_updates():
    global state
    
    state["last_checked"] = datetime.now()
    latest_commit = get_latest_commit()
    
    if latest_commit and latest_commit != state["last_commit_hash"]:
        logger.info(f"New commit detected: {latest_commit}")
        state["last_commit_hash"] = latest_commit
        state["is_up_to_date"] = False
        state["status_message"] = f"Updates detected in {SOURCE_REPO}. Ready to generate quantizations."
    else:
        state["is_up_to_date"] = True
        state["status_message"] = f"No updates detected in {SOURCE_REPO}. Last checked: {state['last_checked'].strftime('%Y-%m-%d %H:%M:%S')}"
    
    save_state()
    return state["status_message"]

def download_model():
    try:
        # Create cache directory if it doesn't exist
        os.makedirs(MODEL_CACHE_DIR, exist_ok=True)
        
        # Clean up any previous downloads to save space
        if os.path.exists(os.path.join(MODEL_CACHE_DIR, os.path.basename(SOURCE_REPO))):
            shutil.rmtree(os.path.join(MODEL_CACHE_DIR, os.path.basename(SOURCE_REPO)))
        
        # Get model repo information to find the smallest safetensors file
        logger.info(f"Getting repository information for {SOURCE_REPO}")
        files = hf_api.list_repo_files(repo_id=SOURCE_REPO)
        
        # Filter for safetensors files (which are the model weights)
        safetensors_files = [f for f in files if f.endswith(".safetensors")]
        
        if not safetensors_files:
            raise Exception(f"No safetensors files found in {SOURCE_REPO}")
        
        # Download only required files instead of the entire repo to save space
        # This includes model config and one weights file
        required_files = [
            "config.json",
            "tokenizer.json",
            "tokenizer_config.json",
            safetensors_files[0]  # Just take the first weights file
        ]
        
        # Create the model directory
        model_dir = os.path.join(MODEL_CACHE_DIR, os.path.basename(SOURCE_REPO))
        os.makedirs(model_dir, exist_ok=True)
        
        # Download only the required files
        for file in required_files:
            if file in files:
                logger.info(f"Downloading {file}")
                hf_api.hf_hub_download(
                    repo_id=SOURCE_REPO,
                    filename=file,
                    local_dir=model_dir,
                    token=HF_TOKEN
                )
        
        return model_dir
    except Exception as e:
        logger.error(f"Error downloading model: {e}")
        state["last_error"] = str(e)
        return None

def process_quantization():
    global state
    
    if state["is_processing"]:
        return "Already processing quantizations. Please wait."
    
    state["is_processing"] = True
    state["progress"] = 0
    state["completed_quants"] = []
    state["failed_quants"] = []
    state["out_of_memory"] = False
    state["last_error"] = None
    state["status_message"] = "Starting quantization process..."
    
    # Start the processing in a separate thread
    thread = threading.Thread(target=quantization_worker)
    thread.daemon = True
    thread.start()
    
    return "Quantization process started. Please wait for it to complete."

def quantization_worker():
    global state
    
    try:
        # Download the model
        model_path = download_model()
        if not model_path:
            state["is_processing"] = False
            state["status_message"] = "Failed to download model. Check logs for details."
            return
        
        # Create temporary output directory
        os.makedirs(TEMP_DIR, exist_ok=True)
        
        # Get model name from the source repo
        model_name = os.path.basename(SOURCE_REPO).lower()
        
        # Process each quantization configuration - we'll do one at a time to save memory
        total_quants = len(QUANT_CONFIGS)
        
        for i, quant_config in enumerate(QUANT_CONFIGS):
            if state["out_of_memory"]:
                # Skip further processing if we've hit memory limits
                break
                
            quant_name = quant_config["name"]
            state["current_quant"] = quant_name
            state["progress"] = (i / total_quants) * 100
            state["status_message"] = f"Processing {quant_name} quantization ({i+1}/{total_quants})"
            
            logger.info(f"Processing quantization: {quant_name}")
            
            try:
                # Free up memory between quantizations - this is crucial for the free tier
                if i > 0:
                    # Clean up previous files
                    for file in os.listdir(TEMP_DIR):
                        file_path = os.path.join(TEMP_DIR, file)
                        if os.path.isfile(file_path):
                            os.remove(file_path)
                
                # Output path for this quantization
                quant_output_path = os.path.join(TEMP_DIR, f"{model_name}-{quant_name.lower()}.gguf")
                
                # Check available disk space before starting
                try:
                    statvfs = os.statvfs(TEMP_DIR)
                    free_space_gb = (statvfs.f_frsize * statvfs.f_bavail) / (1024 * 1024 * 1024)
                    logger.info(f"Available disk space: {free_space_gb:.2f} GB")
                    
                    # Skip if we don't have enough disk space
                    if free_space_gb < quant_config["size_gb"] * 1.5:  # 50% buffer
                        logger.warning(f"Not enough disk space for {quant_name} quantization. Need {quant_config['size_gb'] * 1.5:.2f} GB, have {free_space_gb:.2f} GB")
                        state["failed_quants"].append(f"{quant_name} (disk space)")
                        continue
                except Exception as e:
                    logger.warning(f"Could not check disk space: {e}")
                
                # Run the conversion+quantization in one step to save memory
                # We'll use direct conversion to the target quantization format
                logger.info(f"Converting and quantizing directly to {quant_name}")
                
                # Command to convert and quantize in one step
                quantize_cmd = [
                    "python", 
                    "./llama.cpp/convert.py", 
                    model_path,
                    "--outfile", quant_output_path,
                    "--outtype", quant_name.lower()
                ]
                
                # Create a process for monitoring memory usage
                quantize_process = subprocess.Popen(
                    quantize_cmd, 
                    shell=False, 
                    stdout=subprocess.PIPE,
                    stderr=subprocess.PIPE,
                    text=True
                )
                
                # Poll the process and monitor system resources
                while quantize_process.poll() is None:
                    # Check if we're getting low on memory
                    try:
                        with open('/proc/meminfo', 'r') as f:
                            meminfo = f.read()
                        
                        # Extract available memory
                        available_mem = 0
                        for line in meminfo.split('\n'):
                            if 'MemAvailable:' in line:
                                available_mem = int(line.split()[1]) / 1024  # Convert to MB
                                break
                        
                        # If memory is critically low (less than 500MB), kill the process
                        if available_mem < 500:
                            logger.warning(f"Memory critically low ({available_mem:.2f} MB). Terminating quantization.")
                            quantize_process.terminate()
                            state["out_of_memory"] = True
                            state["failed_quants"].append(f"{quant_name} (OOM)")
                            break
                    except Exception as e:
                        logger.warning(f"Could not check memory usage: {e}")
                    
                    # Wait a bit before checking again
                    time.sleep(5)
                
                # Check if the process completed successfully
                if quantize_process.poll() is None:
                    # Process is still running, kill it
                    quantize_process.terminate()
                    try:
                        quantize_process.wait(timeout=10)
                    except subprocess.TimeoutExpired:
                        quantize_process.kill()
                    
                    raise Exception("Quantization process timed out or was terminated")
                
                # Get process output
                stdout, stderr = quantize_process.communicate()
                
                if quantize_process.returncode != 0:
                    if "out of memory" in stderr.lower() or "allocation failed" in stderr.lower() or "not enough memory" in stderr.lower():
                        logger.warning(f"Out of memory during {quant_name} quantization")
                        state["out_of_memory"] = True
                        state["failed_quants"].append(f"{quant_name} (OOM)")
                        continue
                    else:
                        raise Exception(f"Error during {quant_name} quantization: {stderr}")
                
                # Check if the file was created and has reasonable size
                if not os.path.exists(quant_output_path) or os.path.getsize(quant_output_path) < 1000000:
                    raise Exception(f"Quantization produced invalid or empty file")
                
                # Create or update repository
                repo_name = f"{model_name}-{quant_name.lower()}-gguf"
                username = hf_api.whoami()["name"]
                repo_id = f"{username}/{repo_name}"
                
                try:
                    # Check if repo exists
                    hf_api.repo_info(repo_id=repo_id)
                    logger.info(f"Repository {repo_id} already exists")
                except Exception:
                    # Create repo if it doesn't exist
                    logger.info(f"Creating repository {repo_id}")
                    hf_api.create_repo(repo_id=repo_id, exist_ok=True)
                
                # Upload quantized model
                logger.info(f"Uploading quantized model to {repo_id}")
                
                # Create a simple README first (it's smaller)
                readme_content = f"""# {model_name.capitalize()} - {quant_name} GGUF

This repository contains a {quant_name} quantized GGUF version of [{SOURCE_REPO}](https://huggingface.co/{SOURCE_REPO}).

## Details

- **Quantization Type:** {quant_name}
- **Approximate Size:** {quant_config['size_gb']} GB
- **Notes:** {quant_config['notes']}
- **Original Model:** [Sculptor-AI/Ursa_Minor](https://huggingface.co/{SOURCE_REPO})
- **Auto-generated by:** GGUF Quantizer Space

## Usage with llama.cpp

```bash
# CLI
llama-cli --hf-repo {repo_id} --hf-file {model_name}-{quant_name.lower()}.gguf -p "Your prompt here"

# Server
llama-server --hf-repo {repo_id} --hf-file {model_name}-{quant_name.lower()}.gguf -c 2048
```
"""
                
                readme_path = os.path.join(TEMP_DIR, "README.md")
                with open(readme_path, "w") as f:
                    f.write(readme_content)
                
                # Upload README first (it's smaller)
                hf_api.upload_file(
                    path_or_fileobj=readme_path,
                    path_in_repo="README.md",
                    repo_id=repo_id
                )
                
                # Then upload the model with LFS - this might take a while
                try:
                    upload_start_time = time.time()
                    max_upload_time = 60 * 60  # 1 hour max upload time
                    
                    # Create a thread to monitor the upload
                    upload_success = [False]
                    upload_error = [None]
                    upload_done = [False]
                    
                    def upload_file_with_timeout():
                        try:
                            hf_api.upload_file(
                                path_or_fileobj=quant_output_path,
                                path_in_repo=f"{model_name}-{quant_name.lower()}.gguf",
                                repo_id=repo_id
                            )
                            upload_success[0] = True
                        except Exception as e:
                            upload_error[0] = e
                        finally:
                            upload_done[0] = True
                    
                    upload_thread = threading.Thread(target=upload_file_with_timeout)
                    upload_thread.daemon = True
                    upload_thread.start()
                    
                    # Wait for upload to complete or timeout
                    while not upload_done[0]:
                        if time.time() - upload_start_time > max_upload_time:
                            logger.warning(f"Upload timed out after {max_upload_time/60:.1f} minutes")
                            break
                        time.sleep(10)
                    
                    if upload_success[0]:
                        state["completed_quants"].append(quant_name)
                        logger.info(f"Successfully processed {quant_name} quantization")
                    else:
                        error_msg = str(upload_error[0]) if upload_error[0] else "Upload timed out"
                        logger.error(f"Failed to upload quantized model: {error_msg}")
                        state["failed_quants"].append(f"{quant_name} (upload failed)")
                        state["last_error"] = error_msg
                except Exception as upload_error:
                    logger.error(f"Failed to upload quantized model: {upload_error}")
                    state["failed_quants"].append(f"{quant_name} (upload failed)")
                    state["last_error"] = str(upload_error)
                
                # Delete the large file immediately after upload to save space
                try:
                    os.remove(quant_output_path)
                except Exception as rm_error:
                    logger.warning(f"Could not remove temporary file: {rm_error}")
                
            except subprocess.TimeoutExpired as timeout_error:
                logger.error(f"Timeout during {quant_name} quantization: {timeout_error}")
                state["failed_quants"].append(f"{quant_name} (timeout)")
                state["last_error"] = f"Quantization timed out after 30 minutes"
            except Exception as e:
                logger.error(f"Error processing {quant_name} quantization: {e}")
                state["failed_quants"].append(quant_name)
                state["last_error"] = str(e)
        
        # Final cleanup
        try:
            shutil.rmtree(TEMP_DIR)
        except Exception as e:
            logger.warning(f"Error cleaning up temporary files: {e}")
        
        # Clean up model cache to save space
        try:
            shutil.rmtree(MODEL_CACHE_DIR)
        except Exception as e:
            logger.warning(f"Error cleaning up model cache: {e}")
        
        state["progress"] = 100
        state["is_up_to_date"] = True
        state["is_processing"] = False
        
        if state["out_of_memory"]:
            last_successful = state["completed_quants"][-1] if state["completed_quants"] else "None"
            state["status_message"] = f"Quantization process stopped due to memory limitations after {last_successful}. Smaller quantizations completed successfully."
        elif state["failed_quants"]:
            state["status_message"] = f"Quantization process completed with some failures. {len(state['completed_quants'])}/{total_quants} quantizations were successful."
        else:
            state["status_message"] = f"Quantization process completed successfully. All {len(state['completed_quants'])}/{total_quants} quantizations were created."
            
    except Exception as e:
        logger.error(f"Error in quantization worker: {e}")
        state["is_processing"] = False
        state["last_error"] = str(e)
        state["status_message"] = f"Error during quantization process: {str(e)}"
    
    save_state()

# Create Gradio interface
def create_interface():
    with gr.Blocks(title="Ursa_Minor GGUF Quantizer", css="footer {visibility: hidden}") as demo:
        with gr.Row():
            gr.Markdown("# Ursa_Minor GGUF Auto Quantizer")
        
        with gr.Row():
            with gr.Column(scale=2):
                status_md = gr.Markdown(value=f"### Status: {state['status_message']}")
            
                with gr.Row():
                    check_button = gr.Button("Check for Updates", variant="primary")
                    process_button = gr.Button("Generate Quantizations", variant="secondary")
                
                with gr.Row():
                    last_check = gr.Markdown(value=f"Last Checked: {state['last_checked'].strftime('%Y-%m-%d %H:%M:%S') if state['last_checked'] else 'Never'}")
                
                with gr.Row():
                    up_to_date = gr.Markdown(value=f"Up to Date: {'Yes' if state['is_up_to_date'] else 'No'}")
                
                with gr.Accordion("Details", open=True):
                    with gr.Row():
                        progress = gr.Slider(
                            minimum=0, 
                            maximum=100, 
                            value=state["progress"], 
                            label="Progress",
                            interactive=False
                        )
                    
                    current_task = gr.Markdown(value="")
                    
                    with gr.Row():
                        completed_md = gr.Markdown(value="### Completed Quantizations")
                        completed_list = gr.Markdown(value="None")
                    
                    with gr.Row():
                        failed_md = gr.Markdown(value="### Failed Quantizations")
                        failed_list = gr.Markdown(value="None")
                    
                    with gr.Row():
                        error_md = gr.Markdown(value="### Last Error")
                        error_text = gr.Markdown(value="None")
            
            with gr.Column(scale=1):
                gr.Markdown("### Quantization Types")
                quant_table = gr.DataFrame(
                    value=[[q["name"], f"{q['size_gb']} GB", q["notes"]] for q in QUANT_CONFIGS],
                    headers=["Type", "Size", "Notes"],
                    interactive=False
                )
        
        # Functions to update the UI
        def update_status():
            # Simply update the text components without changing button properties
            status_text = f"### Status: {state['status_message']}"
            last_check_text = f"Last Checked: {state['last_checked'].strftime('%Y-%m-%d %H:%M:%S') if state['last_checked'] else 'Never'}"
            up_to_date_text = f"Up to Date: {'Yes' if state['is_up_to_date'] else 'No'}"
            
            current_task_text = ""
            if state["is_processing"]:
                current_quant = state["current_quant"] or "Preparing"
                current_task_text = f"Current Task: Processing {current_quant} quantization"
            
            completed_text = "None"
            if state["completed_quants"]:
                completed_items = []
                for q in state["completed_quants"]:
                    model_name = os.path.basename(SOURCE_REPO).lower()
                    username = hf_api.whoami()["name"]
                    repo_id = f"{username}/{model_name}-{q.lower()}-gguf"
                    completed_items.append(f"- [{q}](https://huggingface.co/{repo_id})")
                completed_text = "\n".join(completed_items)
            
            failed_text = "None"
            if state["failed_quants"]:
                failed_items = []
                for q in state["failed_quants"]:
                    if "(" in q:  # Check if it has a reason in parentheses
                        name, reason = q.split(" (", 1)
                        reason = reason.rstrip(")")
                        failed_items.append(f"- {name} (Reason: {reason})")
                    else:
                        failed_items.append(f"- {q}")
                failed_text = "\n".join(failed_items)
            
            error_text = "None"
            if state["last_error"]:
                error_text = f"```\n{state['last_error']}\n```"
            
            return [
                status_text,
                last_check_text,
                up_to_date_text,
                state["progress"],
                current_task_text,
                completed_text,
                failed_text,
                error_text
            ]
        
        # Register event handlers
        check_button.click(
            fn=lambda: check_for_updates(),
            outputs=[status_md]
        ).then(
            fn=update_status,
            outputs=[
                status_md, 
                last_check, 
                up_to_date, 
                progress, 
                current_task, 
                completed_list, 
                failed_list, 
                error_text
            ]
        )
        
        process_button.click(
            fn=lambda: process_quantization(),
            outputs=[status_md]
        ).then(
            fn=update_status,
            outputs=[
                status_md, 
                last_check, 
                up_to_date, 
                progress, 
                current_task, 
                completed_list, 
                failed_list, 
                error_text
            ]
        )
        
        # Add an interval for updating the UI during processing
        demo.load(
            fn=update_status,
            outputs=[
                status_md, 
                last_check, 
                up_to_date, 
                progress, 
                current_task, 
                completed_list, 
                failed_list, 
                error_text
            ]
        )
        
        # Schedule periodic checks for updates - but less frequently for free tier
        def scheduled_check():
            while True:
                try:
                    if not state["is_processing"]:
                        check_for_updates()
                except Exception as e:
                    logger.error(f"Error in scheduled check: {e}")
                # Check less frequently to avoid waking up the space too often
                time.sleep(14400)  # Check every 4 hours instead of hourly
        
        # Only start the scheduler thread if we're not in a debugging environment
        if not os.environ.get("GRADIO_DEBUG"):
            scheduler_thread = threading.Thread(target=scheduled_check)
            scheduler_thread.daemon = True
            scheduler_thread.start()
            logger.info("Started background update checker")
        
        return demo

# Initialize state from disk
load_state()

# Create and launch the interface
demo = create_interface()
demo.queue(max_size=10).launch(debug=True, show_api=False)