import gradio as gr from evaluation_logic import run_evaluation, AVAILABLE_PROMPT_FORMATS def gradio_run_evaluation(inference_api, model_name, prompt_format): output = [] for result in run_evaluation(inference_api, str(model_name).strip(), prompt_format): output.append(result) yield "\n".join(output) with gr.Blocks(gr.themes.Soft()) as demo: gr.Markdown("# DuckDB SQL Evaluation App") inference_api = gr.Dropdown( label="Inference API", choices=['openrouter'], value="openrouter" ) model_name = gr.Textbox(label="Model Name (e.g., qwen/qwen-2.5-72b-instruct)") gr.Markdown("[View OpenRouter Models](https://openrouter.ai/models?order=top-weekly)") prompt_format = gr.Dropdown( label="Prompt Format", choices=['duckdbinst', 'duckdbinstgraniteshort'], #AVAILABLE_PROMPT_FORMATS, value="duckdbinstgraniteshort" ) gr.Examples( examples=[ ["openrouter", "qwen/qwen-2.5-72b-instruct", "duckdbinst"], ["openrouter", "meta-llama/llama-3.2-3b-instruct:free", "duckdbinstgraniteshort"], ["openrouter", "mistralai/mistral-nemo", "duckdbinst"], ], inputs=[inference_api, model_name, prompt_format], ) start_btn = gr.Button("Start Evaluation") output = gr.Textbox(label="Output", lines=20) start_btn.click(fn=gradio_run_evaluation, inputs=[inference_api, model_name, prompt_format], outputs=output) demo.queue().launch()