Spaces:
Running
Running
File size: 4,208 Bytes
977063a edbe15e 977063a edbe15e 42f819f edbe15e 42f819f 5051da6 edbe15e 0577ad3 edbe15e 470a9a5 acfff07 5051da6 edbe15e 470a9a5 0524e8d 470a9a5 edbe15e 470a9a5 5051da6 49c6a0b 5051da6 edbe15e 0577ad3 edbe15e 977063a b9dc6d6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 |
import gradio as gr
import os
from evaluation_logic import run_evaluation
from eval.predict import PROMPT_FORMATTERS
PROMPT_TEMPLATES = {
"duckdbinstgraniteshort": PROMPT_FORMATTERS["duckdbinstgraniteshort"]().PROMPT_TEMPLATE,
"duckdbinst": PROMPT_FORMATTERS["duckdbinst"]().PROMPT_TEMPLATE,
}
def gradio_run_evaluation(inference_api, model_name, prompt_format, openrouter_token=None, custom_prompt=None):
# Set environment variable if OpenRouter token is provided
if inference_api == "openrouter":
os.environ["OPENROUTER_API_KEY"] = str(openrouter_token)
# We now pass both the format name and content to evaluation
output = []
for result in run_evaluation(inference_api, str(model_name).strip(), prompt_format, custom_prompt):
output.append(result)
yield "\n".join(output)
def update_token_visibility(api):
"""Update visibility of the OpenRouter token input"""
return gr.update(visible=api == "openrouter")
def update_prompt_template(prompt_format):
"""Update the template content when a preset is selected"""
if prompt_format in PROMPT_TEMPLATES:
return PROMPT_FORMATTERS[prompt_format]().PROMPT_TEMPLATE
return ""
def handle_template_edit(prompt_format, new_template):
"""Handle when user edits the template"""
# If the template matches a preset exactly, keep the preset name
for format_name, template in PROMPT_TEMPLATES.items():
if template.strip() == new_template.strip():
return format_name
# Otherwise switch to custom
return "custom"
with gr.Blocks(gr.themes.Soft()) as demo:
gr.Markdown("# DuckDB SQL Evaluation App")
with gr.Row():
with gr.Column():
inference_api = gr.Dropdown(
label="Inference API",
choices=['openrouter'],
value="openrouter"
)
openrouter_token = gr.Textbox(
label="OpenRouter API Token",
placeholder="Enter your OpenRouter API token",
type="password",
visible=True
)
model_name = gr.Textbox(
label="Model Name (e.g., qwen/qwen-2.5-72b-instruct)"
)
gr.Markdown("[View OpenRouter Models](https://openrouter.ai/models?order=top-weekly)")
with gr.Row():
with gr.Column():
# Add 'custom' to the choices
prompt_format = gr.Dropdown(
label="Prompt Format",
choices=['duckdbinst', 'duckdbinstgraniteshort', 'custom'],
value="duckdbinstgraniteshort"
)
custom_prompt = gr.TextArea(
label="Prompt Template Content",
placeholder="Enter your custom prompt template here or select a preset format above.",
lines=10,
value=PROMPT_TEMPLATES['duckdbinstgraniteshort'] # Set initial value
)
gr.Examples(
examples=[
["openrouter", "qwen/qwen-2.5-72b-instruct", "duckdbinst", "", ""],
["openrouter", "meta-llama/llama-3.2-3b-instruct:free", "duckdbinstgraniteshort", "", ""],
["openrouter", "mistralai/mistral-nemo", "duckdbinst", "", ""],
],
inputs=[inference_api, model_name, prompt_format, openrouter_token, custom_prompt],
)
start_btn = gr.Button("Start Evaluation")
output = gr.Textbox(label="Output", lines=20)
# Update token visibility
inference_api.change(
fn=update_token_visibility,
inputs=[inference_api],
outputs=[openrouter_token]
)
# Update template content when preset is selected
prompt_format.change(
fn=update_prompt_template,
inputs=[prompt_format],
outputs=[custom_prompt]
)
# Update format dropdown when template is edited
# custom_prompt.change(
# fn=handle_template_edit,
# inputs=[prompt_format, custom_prompt],
# outputs=[prompt_format]
#)
start_btn.click(
fn=gradio_run_evaluation,
inputs=[inference_api, model_name, prompt_format, openrouter_token, custom_prompt],
outputs=output
)
demo.queue().launch() |