DuckDB-SQL-Eval / app.py
tdoehmen's picture
just use openrouter
f9d0ccd
raw
history blame
2.84 kB
import gradio as gr
import subprocess
import os
import re
from datetime import datetime
def run_evaluation(model_name):
results = []
# Use the secret OpenRouter API key from the Hugging Face space
if "OPENROUTER_API_KEY" not in os.environ:
return "Error: OPENROUTER_API_KEY not found in environment variables."
try:
# Set up environment
env = os.environ.copy()
env["OPENROUTER_API_KEY"] = os.environ["OPENROUTER_API_KEY"]
# Run inference
current_date = datetime.now().strftime("%Y%m%d")
inference_cmd = f"""
cd duckdb-nsql/ &&
python eval/predict.py \
predict \
eval/data/dev.json \
eval/data/tables.json \
--output-dir output/ \
--stop-tokens ';' \
--max-tokens 30000 \
--overwrite-manifest \
--manifest-client openrouter \
--manifest-engine {model_name} \
--prompt-format duckdbinstgraniteshort
"""
inference_result = subprocess.run(inference_cmd, shell=True, check=True, capture_output=True, text=True, env=env)
results.append("Inference completed.")
# Extract JSON file path from inference output
json_path_match = re.search(r'(.*\.json)', inference_result.stdout)
if not json_path_match:
raise ValueError("Could not find JSON file path in inference output")
json_file = os.path.basename(json_path_match.group(1))
results.append(f"Generated JSON file: {json_file}")
# Run evaluation
eval_cmd = f"""
cd duckdb-nsql/ &&
python eval/evaluate.py evaluate \
--gold eval/data/dev.json \
--db eval/data/databases/ \
--tables eval/data/tables.json \
--output-dir output/ \
--pred output/{json_file}
"""
eval_result = subprocess.run(eval_cmd, shell=True, check=True, capture_output=True, text=True)
# Extract and format metrics from eval output
metrics = eval_result.stdout
if metrics:
results.append(f"Evaluation completed:\n{metrics}")
else:
results.append("Evaluation completed, but couldn't get metrics.")
except subprocess.CalledProcessError as e:
results.append(f"Error occurred: {str(e)}")
results.append(f"Command output: {e.output}")
except Exception as e:
results.append(f"An unexpected error occurred: {str(e)}")
return "\n\n".join(results)
with gr.Blocks() as demo:
gr.Markdown("# DuckDB SQL Evaluation App (OpenRouter)")
model_name = gr.Textbox(label="Model Name (e.g., qwen/qwen-2.5-72b-instruct)")
start_btn = gr.Button("Start Evaluation")
output = gr.Textbox(label="Output", lines=20)
start_btn.click(fn=run_evaluation, inputs=[model_name], outputs=output)
demo.launch()