import gradio as gr
import subprocess
import os
import re
from datetime import datetime

def run_evaluation(model_name):
    results = []

    # Use the secret OpenRouter API key from the Hugging Face space
    if "OPENROUTER_API_KEY" not in os.environ:
        return "Error: OPENROUTER_API_KEY not found in environment variables."

    try:
        # Set up environment
        env = os.environ.copy()
        env["OPENROUTER_API_KEY"] = os.environ["OPENROUTER_API_KEY"]

        # Run inference
        current_date = datetime.now().strftime("%Y%m%d")
        inference_cmd = f"""
        cd duckdb-nsql/ && 
        python eval/predict.py \
        predict \
        eval/data/dev.json \
        eval/data/tables.json \
        --output-dir output/ \
        --stop-tokens ';' \
        --max-tokens 30000 \
        --overwrite-manifest \
        --manifest-client openrouter \
        --manifest-engine {model_name} \
        --prompt-format duckdbinstgraniteshort
        """
        inference_result = subprocess.run(inference_cmd, shell=True, check=True, capture_output=True, text=True, env=env)
        results.append("Inference completed.")

        # Extract JSON file path from inference output
        json_path_match = re.search(r'(.*\.json)', inference_result.stdout)
        if not json_path_match:
            raise ValueError("Could not find JSON file path in inference output")
        json_file = os.path.basename(json_path_match.group(1))
        results.append(f"Generated JSON file: {json_file}")

        # Run evaluation
        eval_cmd = f"""
        cd duckdb-nsql/ && 
        python eval/evaluate.py evaluate \
        --gold eval/data/dev.json \
        --db eval/data/databases/ \
        --tables eval/data/tables.json \
        --output-dir output/ \
        --pred output/{json_file}
        """
        eval_result = subprocess.run(eval_cmd, shell=True, check=True, capture_output=True, text=True)

        # Extract and format metrics from eval output
        metrics = eval_result.stdout
        if metrics:
            results.append(f"Evaluation completed:\n{metrics}")
        else:
            results.append("Evaluation completed, but couldn't get metrics.")

    except subprocess.CalledProcessError as e:
        results.append(f"Error occurred: {str(e)}")
        results.append(f"Command output: {e.output}")
    except Exception as e:
        results.append(f"An unexpected error occurred: {str(e)}")

    return "\n\n".join(results)

with gr.Blocks() as demo:
    gr.Markdown("# DuckDB SQL Evaluation App (OpenRouter)")

    model_name = gr.Textbox(label="Model Name (e.g., qwen/qwen-2.5-72b-instruct)")
    start_btn = gr.Button("Start Evaluation")
    output = gr.Textbox(label="Output", lines=20)

    start_btn.click(fn=run_evaluation, inputs=[model_name], outputs=output)

demo.launch()