File size: 4,661 Bytes
977063a
5051da6
977063a
 
5051da6
 
11fbe39
 
977063a
 
5051da6
977063a
11fbe39
 
 
 
 
977063a
5051da6
 
 
 
11fbe39
5051da6
 
 
11fbe39
5051da6
 
11fbe39
5051da6
 
 
11fbe39
 
6853b2a
 
 
 
5051da6
 
11fbe39
 
 
 
5051da6
 
11fbe39
 
 
 
 
 
 
 
 
 
5051da6
 
11fbe39
6853b2a
 
 
 
 
 
 
 
 
5051da6
 
 
 
 
 
 
 
 
 
 
 
 
 
11fbe39
6853b2a
 
 
 
 
5051da6
 
 
 
 
 
 
 
 
11fbe39
5051da6
 
 
 
 
 
 
 
 
 
 
 
11fbe39
 
 
 
 
 
 
 
 
5051da6
 
11fbe39
5051da6
 
 
11fbe39
 
5051da6
11fbe39
977063a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import gradio as gr
import subprocess
import spaces
import torch
import os
import re
import threading
import queue

zero = torch.Tensor([0]).cuda()
print(zero.device)  # <-- 'cpu' 🤔

def stream_output(process, q):
    for line in iter(process.stdout.readline, b''):
        q.put(line.decode('utf-8').strip())
    process.stdout.close()

@spaces.GPU
def run_evaluation(model_name):
    print(zero.device)  # <-- 'cuda:0' 🤗

    results = []
    manifest_logs = []

    # Use the secret HF token from the Hugging Face space
    if "HF_TOKEN" not in os.environ:
        return "Error: HF_TOKEN not found in environment variables.", "Error: Cannot start manifest server without HF_TOKEN."

    manifest_process = None
    log_queue = queue.Queue()
    try:
        # Start manifest server in background with explicit CUDA_VISIBLE_DEVICES
        manifest_cmd = f"""
        cd duckdb-nsql/ && 
        CUDA_VISIBLE_DEVICES=0 HF_TOKEN={os.environ['HF_TOKEN']}  python -m manifest.api.app \
        --model_type huggingface \
        --model_generation_type text-generation \
        --model_name_or_path {model_name} \
        --fp16 \
        --device 0
        """
        manifest_process = subprocess.Popen(manifest_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1, universal_newlines=True)

        threading.Thread(target=stream_output, args=(manifest_process, log_queue), daemon=True).start()

        results.append("Started manifest server in background.")

        # Wait for the server to initialize (adjust time as needed)
        for _ in range(30):
            try:
                line = log_queue.get(timeout=1)
                manifest_logs.append(line)
                if "Running on" in line:  # Server is ready
                    break
            except queue.Empty:
                pass

        # Run inference
        inference_cmd = f"""
        cd duckdb-nsql/ && 
        python eval/predict.py \
        predict \
        eval/data/dev.json \
        eval/data/tables.json \
        --output-dir output/ \
        --stop-tokens ';' \
        --overwrite-manifest \
        --manifest-client huggingface \
        --manifest-connection http://localhost:5000 \
        --prompt-format duckdbinstgraniteshort
        """
        inference_result = subprocess.run(inference_cmd, shell=True, check=True, capture_output=True, text=True)
        results.append("Inference completed.")

        # Extract JSON file path from inference output
        json_path_match = re.search(r'(.*\.json)', inference_result.stdout)
        if not json_path_match:
            raise ValueError("Could not find JSON file path in inference output")
        json_file = os.path.basename(json_path_match.group(1))
        results.append(f"Generated JSON file: {json_file}")

        # Run evaluation
        eval_cmd = f"""
        cd duckdb-nsql/ && 
        python eval/evaluate.py evaluate \
        --gold eval/data/dev.json \
        --db eval/data/databases/ \
        --tables eval/data/tables.json \
        --output-dir output/ \
        --pred output/{json_file}
        """
        eval_result = subprocess.run(eval_cmd, shell=True, check=True, capture_output=True, text=True)

        # Extract and format metrics from eval output
        metrics = eval_result.stdout
        if metrics:
            results.append(f"Evaluation completed:\n{metrics}")
        else:
            results.append("Evaluation completed, but couldn't get metrics.")

    except subprocess.CalledProcessError as e:
        results.append(f"Error occurred: {str(e)}")
        results.append(f"Command output: {e.output}")
    except Exception as e:
        results.append(f"An unexpected error occurred: {str(e)}")
    finally:
        # Terminate the background manifest server
        if manifest_process:
            manifest_process.terminate()
            results.append("Terminated manifest server.")

        # Collect any remaining logs
        while True:
            try:
                line = log_queue.get_nowait()
                manifest_logs.append(line)
            except queue.Empty:
                break

    return "\n\n".join(results), "\n".join(manifest_logs)

with gr.Blocks() as demo:
    gr.Markdown("# DuckDB SQL Evaluation App")

    model_name = gr.Textbox(label="Model Name (e.g., Qwen/Qwen2.5-7B-Instruct)")
    start_btn = gr.Button("Start Evaluation")
    output = gr.Textbox(label="Evaluation Output", lines=20)
    manifest_output = gr.Textbox(label="Manifest Server Logs", lines=20)

    start_btn.click(fn=run_evaluation, inputs=[model_name], outputs=[output, manifest_output])

demo.launch()