DuckDB-SQL-Eval / app.py
cfahlgren1's picture
cfahlgren1 HF staff
update with examples and save to dataset
470a9a5
raw
history blame
1.5 kB
import gradio as gr
from evaluation_logic import run_evaluation, AVAILABLE_PROMPT_FORMATS
def gradio_run_evaluation(inference_api, model_name, prompt_format):
output = []
for result in run_evaluation(inference_api, str(model_name).strip(), prompt_format):
output.append(result)
yield "\n".join(output)
with gr.Blocks(gr.themes.Soft()) as demo:
gr.Markdown("# DuckDB SQL Evaluation App")
inference_api = gr.Dropdown(
label="Inference API",
choices=['openrouter'],
value="openrouter"
)
model_name = gr.Textbox(label="Model Name (e.g., qwen/qwen-2.5-72b-instruct)")
gr.Markdown("[View OpenRouter Models](https://openrouter.ai/models?order=top-weekly)")
prompt_format = gr.Dropdown(
label="Prompt Format",
choices=['duckdbinst', 'duckdbinstgraniteshort'], #AVAILABLE_PROMPT_FORMATS,
value="duckdbinstgraniteshort"
)
gr.Examples(
examples=[
["openrouter", "qwen/qwen-2.5-72b-instruct", "duckdbinst"],
["openrouter", "meta-llama/llama-3.2-3b-instruct:free", "duckdbinstgraniteshort"],
["openrouter", "mistralai/mistral-nemo", "duckdbinst"],
],
inputs=[inference_api, model_name, prompt_format],
)
start_btn = gr.Button("Start Evaluation")
output = gr.Textbox(label="Output", lines=20)
start_btn.click(fn=gradio_run_evaluation, inputs=[inference_api, model_name, prompt_format], outputs=output)
demo.queue().launch()