Spaces:

duckdb-nsql-hub
/

DuckDB-SQL-Eval

Running

App Files Files Community

tdoehmen commited on Oct 27, 2024

Commit

edbe15e

1 Parent(s): ee5875c

added prompt template and openai api key

Browse files

Files changed (4) hide show

app.py +100 -20
duckdb-nsql/eval/constants.py +3 -1
duckdb-nsql/eval/prompt_formatters.py +29 -0
evaluation_logic.py +14 -6

app.py CHANGED Viewed

@@ -1,40 +1,120 @@
 import gradio as gr
-from evaluation_logic import run_evaluation, AVAILABLE_PROMPT_FORMATS
-def gradio_run_evaluation(inference_api, model_name, prompt_format):
     output = []
-    for result in run_evaluation(inference_api, str(model_name).strip(), prompt_format):
         output.append(result)
         yield "\n".join(output)
 with gr.Blocks(gr.themes.Soft()) as demo:
     gr.Markdown("# DuckDB SQL Evaluation App")
-    inference_api = gr.Dropdown(
-        label="Inference API",
-        choices=['openrouter'],
-        value="openrouter"
-    )
-    model_name = gr.Textbox(label="Model Name (e.g., qwen/qwen-2.5-72b-instruct)")
-    gr.Markdown("[View OpenRouter Models](https://openrouter.ai/models?order=top-weekly)")
-    prompt_format = gr.Dropdown(
-        label="Prompt Format",
-        choices=['duckdbinst', 'duckdbinstgraniteshort'], #AVAILABLE_PROMPT_FORMATS,
-        value="duckdbinstgraniteshort"
-    )
     gr.Examples(
         examples=[
-            ["openrouter", "qwen/qwen-2.5-72b-instruct", "duckdbinst"],
-            ["openrouter", "meta-llama/llama-3.2-3b-instruct:free", "duckdbinstgraniteshort"],
-            ["openrouter", "mistralai/mistral-nemo", "duckdbinst"],
         ],
-        inputs=[inference_api, model_name, prompt_format],
     )
     start_btn = gr.Button("Start Evaluation")
     output = gr.Textbox(label="Output", lines=20)
-    start_btn.click(fn=gradio_run_evaluation, inputs=[inference_api, model_name, prompt_format], outputs=output)
 demo.queue().launch()

 import gradio as gr
+import os
+from evaluation_logic import run_evaluation
+from eval.predict import PROMPT_FORMATTERS
+PROMPT_TEMPLATES = {
+    "duckdbinstgraniteshort": PROMPT_FORMATTERS["duckdbinstgraniteshort"]().PROMPT_TEMPLATE,
+    "duckdbinst": PROMPT_FORMATTERS["duckdbinst"]().PROMPT_TEMPLATE,
+}
+def gradio_run_evaluation(inference_api, model_name, prompt_format, openrouter_token=None, custom_prompt=None):
+    # Set environment variable if OpenRouter token is provided
+    if inference_api == "openrouter":
+        os.environ["OPENROUTER_API_KEY"] = str(openrouter_token)
+    # We now pass both the format name and content to evaluation
     output = []
+    for result in run_evaluation(inference_api, str(model_name).strip(), prompt_format, custom_prompt):
         output.append(result)
         yield "\n".join(output)
+def update_token_visibility(api):
+    """Update visibility of the OpenRouter token input"""
+    return gr.update(visible=api == "openrouter")
+def update_prompt_template(prompt_format):
+    """Update the template content when a preset is selected"""
+    if prompt_format in PROMPT_TEMPLATES:
+        return PROMPT_FORMATTERS[prompt_format]()
+    return ""
+def handle_template_edit(prompt_format, new_template):
+    """Handle when user edits the template"""
+    # If the template matches a preset exactly, keep the preset name
+    for format_name, template in PROMPT_TEMPLATES.items():
+        if template.strip() == new_template.strip():
+            return format_name
+    # Otherwise switch to custom
+    return "custom"
 with gr.Blocks(gr.themes.Soft()) as demo:
     gr.Markdown("# DuckDB SQL Evaluation App")
+    with gr.Row():
+        with gr.Column():
+            inference_api = gr.Dropdown(
+                label="Inference API",
+                choices=['openrouter'],
+                value="openrouter"
+            )
+            openrouter_token = gr.Textbox(
+                label="OpenRouter API Token",
+                placeholder="Enter your OpenRouter API token",
+                type="password",
+                visible=True
+            )
+            model_name = gr.Textbox(
+                label="Model Name (e.g., qwen/qwen-2.5-72b-instruct)"
+            )
+            gr.Markdown("[View OpenRouter Models](https://openrouter.ai/models?order=top-weekly)")
+    with gr.Row():
+        with gr.Column():
+            # Add 'custom' to the choices
+            prompt_format = gr.Dropdown(
+                label="Prompt Format",
+                choices=['duckdbinst', 'duckdbinstgraniteshort', 'custom'],
+                value="duckdbinstgraniteshort"
+            )
+            custom_prompt = gr.TextArea(
+                label="Prompt Template Content",
+                placeholder="Enter your custom prompt template here or select a preset format above.",
+                lines=10,
+                value=PROMPT_TEMPLATES['duckdbinstgraniteshort']  # Set initial value
+            )
     gr.Examples(
         examples=[
+            ["openrouter", "qwen/qwen-2.5-72b-instruct", "duckdbinst", "", PROMPT_TEMPLATES['duckdbinst']],
+            ["openrouter", "meta-llama/llama-3.2-3b-instruct:free", "duckdbinstgraniteshort", "", PROMPT_TEMPLATES['duckdbinstgraniteshort']],
+            ["openrouter", "mistralai/mistral-nemo", "duckdbinst", "", PROMPT_TEMPLATES['duckdbinst']],
         ],
+        inputs=[inference_api, model_name, prompt_format, openrouter_token, custom_prompt],
     )
     start_btn = gr.Button("Start Evaluation")
     output = gr.Textbox(label="Output", lines=20)
+    # Update token visibility
+    inference_api.change(
+        fn=update_token_visibility,
+        inputs=[inference_api],
+        outputs=[openrouter_token]
+    )
+    # Update template content when preset is selected
+    prompt_format.change(
+        fn=update_prompt_template,
+        inputs=[prompt_format],
+        outputs=[custom_prompt]
+    )
+    # Update format dropdown when template is edited
+    custom_prompt.change(
+        fn=handle_template_edit,
+        inputs=[prompt_format, custom_prompt],
+        outputs=[prompt_format]
+    )
+    start_btn.click(
+        fn=gradio_run_evaluation,
+        inputs=[inference_api, model_name, prompt_format, openrouter_token, custom_prompt],
+        outputs=output
+    )
 demo.queue().launch()

duckdb-nsql/eval/constants.py CHANGED Viewed

@@ -16,6 +16,7 @@ from prompt_formatters import (
     DuckDBInstFormatterGPTmini,
     DuckDBInstFormatterPhiAzure,
     DuckDBInstFormatterLlamaSyntax,
 )
 PROMPT_FORMATTERS = {
@@ -33,5 +34,6 @@ PROMPT_FORMATTERS = {
     "duckdbinstgptmini": DuckDBInstFormatterPhi,
     "duckdbinstphiazure": DuckDBInstFormatterPhiAzure,
     "duckdbinstllamabasic": DuckDBInstFormatterLlamaBasic,
-    "duckdbinstllamasyntax": DuckDBInstFormatterLlamaSyntax
 }

     DuckDBInstFormatterGPTmini,
     DuckDBInstFormatterPhiAzure,
     DuckDBInstFormatterLlamaSyntax,
+    DuckDBInstFormatterCustom,
 )
 PROMPT_FORMATTERS = {
     "duckdbinstgptmini": DuckDBInstFormatterPhi,
     "duckdbinstphiazure": DuckDBInstFormatterPhiAzure,
     "duckdbinstllamabasic": DuckDBInstFormatterLlamaBasic,
+    "duckdbinstllamasyntax": DuckDBInstFormatterLlamaSyntax,
+    "custom": DuckDBInstFormatterCustom
 }

duckdb-nsql/eval/prompt_formatters.py CHANGED Viewed

@@ -958,6 +958,35 @@ Write a DuckDB SQL query for the given question!
         )
         return instruction
 class DuckDBInstNoShorthandFormatter(DuckDBInstFormatter):
     """DuckDB Inst class."""

         )
         return instruction
+class DuckDBInstFormatterCustom(RajkumarFormatter):
+    """DuckDB Inst class."""
+    PROMPT_TEMPLATE = ""
+    @classmethod
+    def format_retrieved_context(
+            cls,
+            context: list[str],
+    ) -> str:
+        """Format retrieved context."""
+        context_str = "\n--------\n".join(context)
+        return f"\n### Documentation:\n{context_str}\n"
+    @classmethod
+    def format_prompt(
+            cls,
+            instruction: str,
+            table_text: str,
+            context_text: str,
+    ) -> str | list[str]:
+        """Get prompt format."""
+        instruction = cls.PROMPT_TEMPLATE.format(
+            schema=table_text,
+            question=instruction
+        )
+        return instruction
 class DuckDBInstNoShorthandFormatter(DuckDBInstFormatter):
     """DuckDB Inst class."""

evaluation_logic.py CHANGED Viewed

@@ -54,7 +54,7 @@ def save_prediction(inference_api, model_name, prompt_format, question, generate
                 "timestamp": datetime.now().isoformat()
             }, f)
-def save_evaluation(inference_api, model_name, prompt_format, metrics):
     evaluation_file = evaluation_folder / f"evaluation_{file_uuid}.json"
     evaluation_folder.mkdir(parents=True, exist_ok=True)
@@ -64,6 +64,7 @@ def save_evaluation(inference_api, model_name, prompt_format, metrics):
         "inference_api": inference_api,
         "model_name": model_name,
         "prompt_format": prompt_format,
         "timestamp": datetime.now().isoformat()
     }
@@ -82,7 +83,7 @@ def save_evaluation(inference_api, model_name, prompt_format, metrics):
             json.dump(flattened_metrics, f)
             f.write('\n')
-def run_prediction(inference_api, model_name, prompt_format, output_file):
     dataset_path = str(eval_dir / "data/dev.json")
     table_meta_path = str(eval_dir / "data/tables.json")
     stop_tokens = [';']
@@ -100,7 +101,11 @@ def run_prediction(inference_api, model_name, prompt_format, output_file):
     try:
         # Initialize necessary components
         data_formatter = DefaultLoader()
-        prompt_formatter = PROMPT_FORMATTERS[prompt_format]()
         # Load manifest
         manifest = get_manifest(
@@ -159,7 +164,7 @@ def run_prediction(inference_api, model_name, prompt_format, output_file):
         yield f"Prediction failed with error: {str(e)}"
         yield f"Error traceback: {traceback.format_exc()}"
-def run_evaluation(inference_api, model_name, prompt_format="duckdbinstgraniteshort"):
     if "OPENROUTER_API_KEY" not in os.environ:
         yield "Error: OPENROUTER_API_KEY not found in environment variables."
         return
@@ -176,6 +181,9 @@ def run_evaluation(inference_api, model_name, prompt_format="duckdbinstgranitesh
         yield f"Using model: {model_name}"
         yield f"Using prompt format: {prompt_format}"
         output_file = output_dir / f"{prompt_format}_0docs_{model_name.replace('/', '_')}_dev_{datetime.now().strftime('%y-%m-%d')}.json"
         # Ensure the output directory exists
@@ -186,7 +194,7 @@ def run_evaluation(inference_api, model_name, prompt_format="duckdbinstgranitesh
             yield "Skipping prediction step and proceeding to evaluation."
         else:
             # Run prediction
-            for output in run_prediction(inference_api, model_name, prompt_format, output_file):
                 yield output
         # Run evaluation
@@ -226,7 +234,7 @@ def run_evaluation(inference_api, model_name, prompt_format="duckdbinstgranitesh
         )
         # Save evaluation results to dataset
-        save_evaluation(inference_api, model_name, prompt_format, metrics)
         yield "Evaluation completed."

                 "timestamp": datetime.now().isoformat()
             }, f)
+def save_evaluation(inference_api, model_name, prompt_format, custom_prompt, metrics):
     evaluation_file = evaluation_folder / f"evaluation_{file_uuid}.json"
     evaluation_folder.mkdir(parents=True, exist_ok=True)
         "inference_api": inference_api,
         "model_name": model_name,
         "prompt_format": prompt_format,
+        "custom_prompt": str(custom_prompt),
         "timestamp": datetime.now().isoformat()
     }
             json.dump(flattened_metrics, f)
             f.write('\n')
+def run_prediction(inference_api, model_name, prompt_format, custom_prompt, output_file):
     dataset_path = str(eval_dir / "data/dev.json")
     table_meta_path = str(eval_dir / "data/tables.json")
     stop_tokens = [';']
     try:
         # Initialize necessary components
         data_formatter = DefaultLoader()
+        if prompt_format.startswith("custom"):
+            prompt_formatter = PROMPT_FORMATTERS["custom"]()
+            prompt_formatter.PROMPT_TEMPLATE = custom_prompt
+        else:
+            prompt_formatter = PROMPT_FORMATTERS[prompt_format]()
         # Load manifest
         manifest = get_manifest(
         yield f"Prediction failed with error: {str(e)}"
         yield f"Error traceback: {traceback.format_exc()}"
+def run_evaluation(inference_api, model_name, prompt_format="duckdbinstgraniteshort", custom_prompt=None):
     if "OPENROUTER_API_KEY" not in os.environ:
         yield "Error: OPENROUTER_API_KEY not found in environment variables."
         return
         yield f"Using model: {model_name}"
         yield f"Using prompt format: {prompt_format}"
+        if prompt_format == "custom":
+            prompt_format = prompt_format+"_"+str(abs(hash(custom_prompt)) % (10 ** 8))
         output_file = output_dir / f"{prompt_format}_0docs_{model_name.replace('/', '_')}_dev_{datetime.now().strftime('%y-%m-%d')}.json"
         # Ensure the output directory exists
             yield "Skipping prediction step and proceeding to evaluation."
         else:
             # Run prediction
+            for output in run_prediction(inference_api, model_name, prompt_format, custom_prompt, output_file):
                 yield output
         # Run evaluation
         )
         # Save evaluation results to dataset
+        save_evaluation(inference_api, model_name, prompt_format, custom_prompt, metrics)
         yield "Evaluation completed."