Spaces:

umdclip
/

quizbowl-submission

Running

Maharshi Gor commited on 19 days ago

Commit

02b7dec

1 Parent(s): 0bab47c

Adds quizbowl pipeline support for bonus and tossup questions

Implements new YAML configurations for bonus and tossup pipelines, enhancing the quizbowl application.

Introduces a two-step process for evaluating answers and confidence levels.
Updates UI components to facilitate pipeline selection and loading, improving user experience.

Files changed (16) hide show

examples/bonus/simple-bonus-pipeline.yaml +55 -0
examples/tossup/simple-tossup-pipeline.yaml +36 -0
examples/tossup/two-step-justified-confidence.yaml +49 -0
src/app_configs.py +1 -0
src/components/model_pipeline/tossup_pipeline.py +10 -8
src/components/quizbowl/bonus.py +53 -49
src/components/quizbowl/commons.py +2 -0
src/components/quizbowl/plotting.py +5 -5
src/components/quizbowl/populate.py +38 -0
src/components/quizbowl/tossup.py +34 -31
src/display/custom_css.py +59 -16
src/display/guide.py +23 -7
src/envs.py +3 -1
src/submission/structs.py +5 -2
src/submission/submit.py +32 -5
src/workflows/structs.py +6 -0

examples/bonus/simple-bonus-pipeline.yaml ADDED Viewed

	@@ -0,0 +1,55 @@

+inputs:
+- leadin
+- part
+outputs:
+    answer: A.answer
+    confidence: A.confidence
+    explanation: A.explanation
+steps:
+-   id: A
+    name: Bonus Agent
+    model: gpt-4o-mini
+    provider: OpenAI
+    temperature: 0.3
+    system_prompt: 'You are an expert quizbowl player specializing in answering bonus questions across various academic domains including history, literature, science, fine arts, and social sciences.
+        Quizbowl bonus questions consist of a leadin paragraph followed by multiple parts. Each part tests specific knowledge within the topic introduced in the leadin.
+        For each bonus part you receive:
+        1. Carefully analyze both the leadin context and the specific part text
+        2. Identify key clues, terms, and relationships mentioned
+        3. Determine the most precise answer based on the specific wording and context
+        4. Provide your answer in the standard accepted format (e.g., full name for people, complete titles for works)
+        5. Assess your confidence on a scale from 0.0 (complete guess) to 1.0 (absolute certainty)
+        6. Explain your reasoning process, citing specific clues from the question that led to your answer
+        Remember that bonus questions often follow patterns:
+        - Parts typically increase in difficulty from easy (15 points) to medium (10 points) to hard (5 points)
+        - Later parts may build on information from earlier parts
+        - Specific answer formats may be requested (e.g., "name the author", "identify the compound")
+        Format your response precisely as:
+        ANSWER: <your specific, concise answer>
+        CONFIDENCE: <numerical value between 0.0 and 1.0>
+        EXPLANATION: <detailed reasoning that connects specific clues to your answer, demonstrates your thought process, and justifies your confidence level>'
+    input_fields:
+    -   name: question_leadin
+        description: The introductory paragraph that establishes the topic and context for all parts of the bonus question
+        variable: leadin
+    -   name: question_part
+        description: The specific part text containing clues that should lead to a single answer
+        variable: part
+    output_fields:
+    -   name: answer
+        description: The precise predicted answer that directly responds to what the question is asking for
+        type: str
+    -   name: confidence
+        type: float
+        description: A numerical assessment (0.0-1.0) of certainty in the provided answer, where 0.0 indicates a complete guess and 1.0 indicates absolute certainty
+    -   name: explanation
+        description: A detailed justification that connects specific clues from the question to the answer and explains the reasoning process
+        type: str

examples/tossup/simple-tossup-pipeline.yaml ADDED Viewed

	@@ -0,0 +1,36 @@

+inputs:
+- question_text
+outputs:
+    answer: A.answer
+    confidence: A.confidence
+steps:
+-   id: A
+    name: Tossup Agent
+    model: gpt-4o-mini
+    provider: OpenAI
+    temperature: 0.1
+    system_prompt: |
+        You are a professional quizbowl player answering tossup questions.
+        Given a progressively revealed question text, provide your best guess at the answer and your confidence level.
+        Your task:
+        1. Analyze the clues provided in the question text
+        2. Determine the most likely answer based on the information available
+        3. Assess your confidence in your answer on a scale from 0.0 (complete guess) to 1.0 (absolute certainty)
+        Keep your answer direct and concise, limited to a couple of words.
+        Your confidence should reflect how certain you are based on the clues revealed so far.
+    input_fields:
+    -   name: question
+        description: The progressively revealed question text so far.
+        variable: question_text
+    output_fields:
+    -   name: answer
+        description: Your best guess at the answer to the revealed question text.
+        type: str
+    -   name: confidence
+        type: float
+        description: Your confidence in the answer, ranging from 0.0 (complete guess) to 1.0 (absolute certainty) in increments of 0.01.
+buzzer:
+    method: AND
+    confidence_threshold: 0.8

examples/tossup/two-step-justified-confidence.yaml ADDED Viewed

	@@ -0,0 +1,49 @@

+inputs:
+- question_text
+outputs:
+    answer: A.answer
+    confidence: B.confidence
+    justification: B.justification
+steps:
+-   id: A
+    name: Answer Generator
+    model: gpt-4o-mini
+    provider: OpenAI
+    temperature: 0.1
+    system_prompt: You are a professional quizbowl player answering tossup questions.
+        Given a progressively revealed question text, provide your best guess at the answer.
+        Keep your answer direct and concise, limited to a couple of words.
+        Focus only on determining the correct answer based on the clues provided so far.
+    input_fields:
+    -   name: question
+        description: The progressively revealed question text so far.
+        variable: question_text
+    output_fields:
+    -   name: answer
+        description: Your best guess at the answer to the revealed question text.
+-   id: B
+    name: Confidence Evaluator
+    model: command-r-plus
+    provider: Cohere
+    temperature: 0.1
+    system_prompt: You are a professional quizbowl player evaluating answer confidence.
+        Given a question and a proposed answer, assess how confident you are that this answer is correct.
+        Provide a confidence score from 0.0 (completely certain about correctness) to 1.0 (completely certain about incorrectness) in increments of 0.01.
+        0.5 means you are highly uncertain about the correctness of the answer.
+        Also provide a single-line justification explaining why you assigned this confidence level.
+    input_fields:
+    -   name: question
+        description: The progressively revealed question text so far.
+        variable: question_text
+    -   name: proposed_answer
+        description: The answer proposed by the first step.
+        variable: A.answer
+    output_fields:
+    -   name: confidence
+        type: float
+        description: Your confidence in the proposed answer, ranging from 0.0 to 1.0 in increments of 0.01.
+    -   name: justification
+        description: A single-line explanation justifying your confidence score.
+buzzer:
+    method: AND
+    confidence_threshold: 0.8

src/app_configs.py CHANGED Viewed

@@ -2,6 +2,7 @@ THEME = "gstaff/sketch"
 UNSELECTED_VAR_NAME = "Select Variable..."
 UNSELECTED_MODEL_NAME = "Select Model..."
 AVAILABLE_MODELS = {
     "OpenAI/gpt-4o": {
         "model": "gpt-4o-2024-11-20",

 UNSELECTED_VAR_NAME = "Select Variable..."
 UNSELECTED_MODEL_NAME = "Select Model..."
+UNSELECTED_PIPELINE_NAME = "Select Pipeline to Import..."
 AVAILABLE_MODELS = {
     "OpenAI/gpt-4o": {
         "model": "gpt-4o-2024-11-20",

src/components/model_pipeline/tossup_pipeline.py CHANGED Viewed

@@ -52,15 +52,13 @@ class TossupPipelineInterface(PipelineInterface):
         tokens_prob: float | None,
     ):
         """Update the buzzer."""
-        if tokens_prob and tokens_prob > 1e-5:
-            log_prob_thresh = float(np.log(tokens_prob)) if tokens_prob > 0 else None
-        else:
-            log_prob_thresh = None
         state.workflow.buzzer = state.workflow.buzzer.model_copy(
             update={
                 "method": method,
                 "confidence_threshold": confidence_threshold,
-                "log_prob_threshold": log_prob_thresh,
             }
         )
         Buzzer.model_validate(state.workflow.buzzer)
@@ -108,14 +106,17 @@ class TossupPipelineInterface(PipelineInterface):
                     )
                     dropdowns[output_field] = dropdown
             with gr.Row(elem_classes="output-fields-header"):
-                gr.Markdown("#### Buzzer settings:")
             with gr.Row(elem_classes="control-panel"):
                 self.confidence_slider = gr.Slider(
                     minimum=0.0,
                     maximum=1.0,
                     value=self.defaults.get("confidence_threshold", 0.85),
                     step=0.01,
-                    label="Confidence Threshold",
                 )
                 self.buzzer_method_dropdown = gr.Dropdown(
                     choices=["AND", "OR"],
@@ -127,10 +128,11 @@ class TossupPipelineInterface(PipelineInterface):
                 )
                 self.prob_slider = gr.Slider(
                     value=self.defaults.get("logits_prob", 0.0),
-                    label="Probability threshold",
                     minimum=0.0,
                     maximum=1.0,
                     step=0.001,
                 )
         def update_choices(available_variables):

         tokens_prob: float | None,
     ):
         """Update the buzzer."""
+        prob_threshold = float(tokens_prob) if tokens_prob and tokens_prob > 0 else None
         state.workflow.buzzer = state.workflow.buzzer.model_copy(
             update={
                 "method": method,
                 "confidence_threshold": confidence_threshold,
+                "prob_threshold": prob_threshold,
             }
         )
         Buzzer.model_validate(state.workflow.buzzer)
                     )
                     dropdowns[output_field] = dropdown
             with gr.Row(elem_classes="output-fields-header"):
+                gr.Markdown(
+                    "#### Buzzer settings:\n Set your thresholds for confidence and output tokens probability."
+                )
             with gr.Row(elem_classes="control-panel"):
                 self.confidence_slider = gr.Slider(
                     minimum=0.0,
                     maximum=1.0,
                     value=self.defaults.get("confidence_threshold", 0.85),
                     step=0.01,
+                    label="Confidence",
+                    elem_classes="slider-container",
                 )
                 self.buzzer_method_dropdown = gr.Dropdown(
                     choices=["AND", "OR"],
                 )
                 self.prob_slider = gr.Slider(
                     value=self.defaults.get("logits_prob", 0.0),
+                    label="Probability",
                     minimum=0.0,
                     maximum=1.0,
                     step=0.001,
+                    elem_classes="slider-container",
                 )
         def update_choices(available_variables):

src/components/quizbowl/bonus.py CHANGED Viewed

@@ -6,13 +6,14 @@ import pandas as pd
 from datasets import Dataset
 from loguru import logger
 from components.model_pipeline.model_pipeline import PipelineInterface, PipelineState, PipelineUIState
 from display.formatting import styled_error
 from submission import submit
 from workflows.qb_agents import QuizBowlBonusAgent
 from workflows.structs import ModelStep, Workflow
-from . import commons
 from .plotting import (
     create_bonus_confidence_plot,
     create_bonus_html,
@@ -120,10 +121,11 @@ class BonusInterface:
         self.output_state = gr.State(value="{}")
         self.render()
-    def _render_model_interface(self, workflow: Workflow, simple: bool = True):
         """Render the model interface."""
-        with gr.Row():
-            self.model_selector = commons.get_pipeline_selector([])
         self.pipeline_interface = PipelineInterface(
             workflow,
             simple=simple,
@@ -137,16 +139,13 @@ class BonusInterface:
             self.run_btn = gr.Button("Run on Bonus Question", variant="secondary")
         self.question_display = gr.HTML(label="Question", elem_id="bonus-question-display")
-        with gr.Row():
-            self.confidence_plot = gr.Plot(
-                label="Part Confidence",
-                format="webp",
-            )
         self.results_table = gr.DataFrame(
             label="Model Outputs",
             value=pd.DataFrame(columns=["Part", "Correct?", "Confidence", "Prediction", "Explanation"]),
         )
         with gr.Row():
             self.eval_btn = gr.Button("Evaluate", variant="primary")
@@ -168,7 +167,7 @@ class BonusInterface:
         with gr.Row():
             # Model Panel
             with gr.Column(scale=1):
-                self._render_model_interface(workflow, simple=self.defaults["simple_workflow"])
             with gr.Column(scale=1):
                 self._render_qb_interface()
@@ -192,20 +191,6 @@ class BonusInterface:
         except Exception as e:
             return f"Error loading question: {str(e)}"
-    def get_user_submission_names(self, profile: gr.OAuthProfile | None) -> list[str]:
-        if profile is None:
-            logger.error("Authentication required. Please log in to view your submissions.")
-            return []
-        model_names = submit.get_user_submission_names("bonus", profile)
-        logger.info("Loaded model names: {model_names}")
-        return gr.update(choices=model_names, value=None)
-    def load_user_submission(self, model_name: str, profile: gr.OAuthProfile | None) -> PipelineState:
-        if profile is None:
-            return styled_error("Authentication required. Please log in to view your submissions.")
-        submission = submit.load_submission(model_name, "tossup", profile)
-        return PipelineState(workflow=submission.workflow, ui_state=PipelineUIState.from_workflow(submission.workflow))
     def get_model_outputs(self, example: dict, pipeline_state: PipelineState):
         """Get the model outputs for a given question ID."""
         outputs = []
@@ -224,6 +209,20 @@ class BonusInterface:
         return outputs
     def single_run(
         self,
         question_id: int,
@@ -242,18 +241,26 @@ class BonusInterface:
             # Process results and prepare visualization data
             html_content, plot_data, output_state = initialize_eval_interface(example, outputs)
             df = process_bonus_results(outputs)
             return (
                 html_content,
-                gr.update(value=plot_data, label=f"Part Confidence on Question {question_id + 1}"),
                 gr.update(value=output_state),
-                gr.update(value=df, label=f"Model Outputs for Question {question_id + 1}"),
             )
         except Exception as e:
             import traceback
             error_msg = f"Error: {str(e)}\n{traceback.format_exc()}"
-            return error_msg, gr.skip(), gr.skip(), gr.skip()
     def evaluate(self, pipeline_state: PipelineState, progress: gr.Progress = gr.Progress()):
         """Evaluate the bonus questions."""
@@ -288,14 +295,15 @@ class BonusInterface:
                 ]
             )
-            plot_data = create_scatter_pyplot(part_numbers, part_scores)
             return (
                 gr.update(value=df, label="Scores on Sample Set"),
-                gr.update(value=plot_data, label="Part Scores on Sample Set"),
             )
         except Exception as e:
             logger.exception(f"Error evaluating bonus: {e.args}")
-            return "Error evaluating bonus", None, None
     def submit_model(
         self, model_name: str, description: str, pipeline_state: PipelineState, profile: gr.OAuthProfile = None
@@ -315,19 +323,19 @@ class BonusInterface:
         gr.on(
             triggers=[self.app.load],
-            fn=self.get_user_submission_names,
-            outputs=[self.model_selector],
         )
-        # self.new_loaded_pipeline_state = gr.State(value=None)
-        # self.model_selector.change(
-        #     fn=self.load_user_submission,
-        #     inputs=[self.model_selector],
-        #     outputs=[self.new_loaded_pipeline_state],
-        # )
-        # self.pipeline_interface.add_triggers_for_pipeline_export(
-        #     [self.new_loaded_pipeline_state.change], self.new_loaded_pipeline_state
-        # )
         self.run_btn.click(
             self.pipeline_interface.validate_workflow,
@@ -341,16 +349,17 @@ class BonusInterface:
             ],
             outputs=[
                 self.question_display,
-                self.confidence_plot,
                 self.output_state,
                 self.results_table,
             ],
         )
         self.eval_btn.click(
             fn=self.evaluate,
             inputs=[self.pipeline_interface.pipeline_state],
-            outputs=[self.results_table, self.confidence_plot],
         )
         self.submit_btn.click(
@@ -362,8 +371,3 @@ class BonusInterface:
             ],
             outputs=[self.submit_status],
         )
-        self.hidden_input.change(
-            fn=update_tossup_plot,
-            inputs=[self.hidden_input, self.output_state],
-            outputs=[self.confidence_plot],
-        )

 from datasets import Dataset
 from loguru import logger
+from app_configs import UNSELECTED_PIPELINE_NAME
 from components.model_pipeline.model_pipeline import PipelineInterface, PipelineState, PipelineUIState
 from display.formatting import styled_error
 from submission import submit
 from workflows.qb_agents import QuizBowlBonusAgent
 from workflows.structs import ModelStep, Workflow
+from . import commons, populate
 from .plotting import (
     create_bonus_confidence_plot,
     create_bonus_html,
         self.output_state = gr.State(value="{}")
         self.render()
+    def _render_pipeline_interface(self, workflow: Workflow, simple: bool = True):
         """Render the model interface."""
+        with gr.Row(elem_classes="bonus-header-row form-inline"):
+            self.pipeline_selector = commons.get_pipeline_selector([])
+            self.load_btn = gr.Button("⬇️ Import Pipeline", variant="secondary")
         self.pipeline_interface = PipelineInterface(
             workflow,
             simple=simple,
             self.run_btn = gr.Button("Run on Bonus Question", variant="secondary")
         self.question_display = gr.HTML(label="Question", elem_id="bonus-question-display")
+        self.error_display = gr.HTML(label="Error", elem_id="bonus-error-display", visible=False)
         self.results_table = gr.DataFrame(
             label="Model Outputs",
             value=pd.DataFrame(columns=["Part", "Correct?", "Confidence", "Prediction", "Explanation"]),
+            visible=False,
         )
+        self.model_outputs_display = gr.JSON(label="Model Outputs", value="{}", show_indices=True, visible=False)
         with gr.Row():
             self.eval_btn = gr.Button("Evaluate", variant="primary")
         with gr.Row():
             # Model Panel
             with gr.Column(scale=1):
+                self._render_pipeline_interface(workflow, simple=self.defaults["simple_workflow"])
             with gr.Column(scale=1):
                 self._render_qb_interface()
         except Exception as e:
             return f"Error loading question: {str(e)}"
     def get_model_outputs(self, example: dict, pipeline_state: PipelineState):
         """Get the model outputs for a given question ID."""
         outputs = []
         return outputs
+    def get_pipeline_names(self, profile: gr.OAuthProfile | None) -> list[str]:
+        names = [UNSELECTED_PIPELINE_NAME] + populate.get_pipeline_names("bonus", profile)
+        return gr.update(choices=names, value=UNSELECTED_PIPELINE_NAME)
+    def load_pipeline(self, model_name: str, profile: gr.OAuthProfile | None) -> tuple[str, PipelineState]:
+        try:
+            pipeline_state = populate.load_pipeline("bonus", model_name, profile)
+            if pipeline_state is None:
+                return UNSELECTED_PIPELINE_NAME, gr.skip(), gr.update(visible=False)
+            return UNSELECTED_PIPELINE_NAME, pipeline_state, gr.update(visible=True)
+        except Exception as e:
+            error_msg = styled_error(f"Error loading pipeline: {str(e)}")
+            return UNSELECTED_PIPELINE_NAME, gr.skip(), gr.update(visible=True, value=error_msg)
     def single_run(
         self,
         question_id: int,
             # Process results and prepare visualization data
             html_content, plot_data, output_state = initialize_eval_interface(example, outputs)
             df = process_bonus_results(outputs)
+            step_outputs = [output["step_outputs"] for output in outputs]
             return (
                 html_content,
                 gr.update(value=output_state),
+                gr.update(value=df, label=f"Model Outputs for Question {question_id + 1}", visible=True),
+                gr.update(value=step_outputs, label=f"Step Outputs for Question {question_id + 1}", visible=True),
+                gr.update(visible=False),
             )
         except Exception as e:
             import traceback
             error_msg = f"Error: {str(e)}\n{traceback.format_exc()}"
+            return (
+                gr.skip(),
+                gr.skip(),
+                gr.update(visible=False),
+                gr.update(visible=False),
+                gr.update(visible=True, value=error_msg),
+            )
     def evaluate(self, pipeline_state: PipelineState, progress: gr.Progress = gr.Progress()):
         """Evaluate the bonus questions."""
                 ]
             )
+            # plot_data = create_scatter_pyplot(part_numbers, part_scores)
             return (
                 gr.update(value=df, label="Scores on Sample Set"),
+                gr.update(visible=False),
             )
         except Exception as e:
+            error_msg = styled_error(f"Error evaluating bonus: {e.args}")
             logger.exception(f"Error evaluating bonus: {e.args}")
+            return gr.skip(), gr.update(visible=True, value=error_msg)
     def submit_model(
         self, model_name: str, description: str, pipeline_state: PipelineState, profile: gr.OAuthProfile = None
         gr.on(
             triggers=[self.app.load],
+            fn=self.get_pipeline_names,
+            outputs=[self.pipeline_selector],
         )
+        self.new_loaded_pipeline_state = gr.State(value=None)
+        self.load_btn.click(
+            fn=self.load_pipeline,
+            inputs=[self.pipeline_selector],
+            outputs=[self.pipeline_selector, self.new_loaded_pipeline_state, self.error_display],
+        )
+        self.pipeline_interface.add_triggers_for_pipeline_export(
+            [self.new_loaded_pipeline_state.change], self.new_loaded_pipeline_state
+        )
         self.run_btn.click(
             self.pipeline_interface.validate_workflow,
             ],
             outputs=[
                 self.question_display,
                 self.output_state,
                 self.results_table,
+                self.model_outputs_display,
+                self.error_display,
             ],
         )
         self.eval_btn.click(
             fn=self.evaluate,
             inputs=[self.pipeline_interface.pipeline_state],
+            outputs=[self.results_table, self.error_display],
         )
         self.submit_btn.click(
             ],
             outputs=[self.submit_status],
         )

src/components/quizbowl/commons.py CHANGED Viewed

@@ -21,4 +21,6 @@ def get_pipeline_selector(model_options: list[str]):
         choices=model_options,
         value="",
         interactive=True,
     )

         choices=model_options,
         value="",
         interactive=True,
+        container=False,
+        elem_classes="pipeline-selector",
     )

src/components/quizbowl/plotting.py CHANGED Viewed

@@ -46,11 +46,11 @@ def _create_token_tooltip_html(values) -> str:
     return f"""
         <div class="tooltip card" style="background-color: {color}; border-radius: 8px; padding: 12px; box-shadow: 2px 4px 8px rgba(0, 0, 0, 0.15);">
-            <div class="tooltip-content" style="font-family: 'Arial', sans-serif; color: #333;">
-                <h4 style="margin: 0 0 8px;">💡 Answer</h4>
-                <p style="font-weight: bold; margin: 0 0 8px;">{answer}</p>
-                <p style="margin: 0 0 4px;">📊 <strong>Confidence:</strong> {confidence:.2f}</p>
-                <p style="margin: 0;">🔍 <strong>Status:</strong> {"✅ Correct" if score else "❌ Incorrect" if buzz else "🚫 No Buzz"}</p>
             </div>
         </div>
     """

     return f"""
         <div class="tooltip card" style="background-color: {color}; border-radius: 8px; padding: 12px; box-shadow: 2px 4px 8px rgba(0, 0, 0, 0.15);">
+            <div class="tooltip-content" style="font-family: 'Arial', sans-serif; color: #000;">
+                <h4 style="margin: 0 0 8px; color: #000;">💡 Answer</h4>
+                <p style="font-weight: bold; margin: 0 0 8px; color: #000;">{answer}</p>
+                <p style="margin: 0 0 4px; color: #000;">📊 <b style="color: #000;">Confidence:</b> {confidence:.2f}</p>
+                <p style="margin: 0; color: #000;">🔍 <b style="color: #000;">Status:</b> {"✅ Correct" if score else "❌ Incorrect" if buzz else "🚫 No Buzz"}</p>
             </div>
         </div>
     """

src/components/quizbowl/populate.py ADDED Viewed

	@@ -0,0 +1,38 @@

+from typing import Optional
+import gradio as gr
+from loguru import logger
+from app_configs import UNSELECTED_PIPELINE_NAME
+from components.model_pipeline.model_pipeline import PipelineState, PipelineUIState
+from display.formatting import styled_error
+from submission import submit
+def get_user_submission_names(profile: gr.OAuthProfile | None) -> list[str]:
+    if profile is None:
+        logger.error("Authentication required. Please log in to view your submissions.")
+        return []
+    return submit.get_user_submission_names("tossup", profile)
+def get_pipeline_names(competition_type: str, profile: gr.OAuthProfile | None) -> list[str]:
+    demo_example_names = submit.get_demo_example_submissions(competition_type)
+    user_model_names = submit.get_user_submission_names(competition_type, profile)
+    all_names = demo_example_names + user_model_names
+    logger.info("Loaded model names: {all_names}")
+    return all_names
+def load_pipeline(competition_type: str, model_name: str, profile: gr.OAuthProfile | None) -> Optional[PipelineState]:
+    if not model_name or model_name == UNSELECTED_PIPELINE_NAME:
+        return None
+    username, model_name = model_name.split("/")
+    if username == "umdclip":
+        workflow = submit.load_demo_example(model_name, competition_type)
+    elif profile is not None:
+        submission = submit.load_submission(model_name, competition_type, profile)
+        workflow = submission.workflow
+    else:
+        raise gr.Error("Authentication required. Please log in to view your submissions.")
+    return PipelineState(workflow=workflow, ui_state=PipelineUIState.from_workflow(workflow))

src/components/quizbowl/tossup.py CHANGED Viewed

@@ -7,6 +7,7 @@ import pandas as pd
 from datasets import Dataset
 from loguru import logger
 from components.model_pipeline.model_pipeline import PipelineInterface, PipelineState, PipelineUIState
 from components.model_pipeline.tossup_pipeline import TossupPipelineInterface, TossupPipelineState
 from display.formatting import styled_error
@@ -14,7 +15,7 @@ from submission import submit
 from workflows.qb_agents import QuizBowlTossupAgent, TossupResult
 from workflows.structs import ModelStep, TossupWorkflow
-from . import commons
 from .plotting import (
     create_scatter_pyplot,
     create_tossup_confidence_pyplot,
@@ -190,8 +191,9 @@ class TossupInterface:
     def _render_pipeline_interface(self, workflow: TossupWorkflow, simple: bool = True):
         """Render the model interface."""
-        with gr.Row():
-            self.model_selector = commons.get_pipeline_selector([])
         self.pipeline_interface = TossupPipelineInterface(
             workflow,
             simple=simple,
@@ -217,10 +219,11 @@ class TossupInterface:
                 label="Buzz Confidence",
                 format="webp",
             )
-        self.model_outputs_display = gr.JSON(label="Model Outputs", value="{}", visible=False)
         self.results_table = gr.DataFrame(
             label="Model Outputs",
             value=pd.DataFrame(columns=["Token Position", "Correct?", "Confidence", "Prediction"]),
         )
         with gr.Row():
             self.eval_btn = gr.Button("Evaluate", variant="primary")
@@ -285,19 +288,19 @@ class TossupInterface:
         outputs = add_model_scores(outputs, example["clean_answers"], example["run_indices"])
         return outputs
-    def get_user_submission_names(self, profile: gr.OAuthProfile | None) -> list[str]:
-        if profile is None:
-            logger.error("Authentication required. Please log in to view your submissions.")
-            return []
-        model_names = submit.get_user_submission_names("tossup", profile)
-        logger.info("Loaded model names: {model_names}")
-        return gr.update(choices=model_names, value=None)
-    def load_user_submission(self, model_name: str, profile: gr.OAuthProfile | None) -> PipelineState:
-        if profile is None:
-            return styled_error("Authentication required. Please log in to view your submissions.")
-        submission = submit.load_submission(model_name, "tossup", profile)
-        return PipelineState(workflow=submission.workflow, ui_state=PipelineUIState.from_workflow(submission.workflow))
     def single_run(
         self,
@@ -322,7 +325,7 @@ class TossupInterface:
                 tokens_html,
                 gr.update(value=output_state),
                 gr.update(value=plot_data, label=f"Buzz Confidence on Question {question_id + 1}"),
-                gr.update(value=df, label=f"Model Outputs for Question {question_id + 1}"),
                 gr.update(value=step_outputs, label=f"Step Outputs for Question {question_id + 1}", visible=True),
                 gr.update(visible=False),
             )
@@ -334,7 +337,7 @@ class TossupInterface:
                 gr.skip(),
                 gr.skip(),
                 gr.skip(),
-                gr.skip(),
                 gr.update(visible=False),
                 gr.update(visible=True, value=error_msg),
             )
@@ -371,7 +374,7 @@ class TossupInterface:
             plot_data = create_scatter_pyplot(token_positions, correctness)
             return (
                 gr.update(value=plot_data, label="Buzz Positions on Sample Set"),
-                gr.update(value=df, label="Scores on Sample Set"),
                 gr.update(visible=False),
             )
         except Exception as e:
@@ -380,7 +383,7 @@ class TossupInterface:
             logger.exception(f"Error evaluating tossups: {e.args}")
             return (
                 gr.skip(),
-                gr.skip(),
                 gr.update(visible=True, value=styled_error(f"Error: {str(e)}\n{traceback.format_exc()}")),
             )
@@ -400,19 +403,19 @@ class TossupInterface:
         gr.on(
             triggers=[self.app.load],
-            fn=self.get_user_submission_names,
-            outputs=[self.model_selector],
         )
-        # self.new_loaded_pipeline_state = gr.State(value=None)
-        # self.model_selector.change(
-        #     fn=self.load_user_submission,
-        #     inputs=[self.model_selector],
-        #     outputs=[self.new_loaded_pipeline_state],
-        # )
-        # self.pipeline_interface.add_triggers_for_pipeline_export(
-        #     [self.new_loaded_pipeline_state.change], self.new_loaded_pipeline_state
-        # )
         self.run_btn.click(
             self.pipeline_interface.validate_workflow,

 from datasets import Dataset
 from loguru import logger
+from app_configs import UNSELECTED_PIPELINE_NAME
 from components.model_pipeline.model_pipeline import PipelineInterface, PipelineState, PipelineUIState
 from components.model_pipeline.tossup_pipeline import TossupPipelineInterface, TossupPipelineState
 from display.formatting import styled_error
 from workflows.qb_agents import QuizBowlTossupAgent, TossupResult
 from workflows.structs import ModelStep, TossupWorkflow
+from . import commons, populate
 from .plotting import (
     create_scatter_pyplot,
     create_tossup_confidence_pyplot,
     def _render_pipeline_interface(self, workflow: TossupWorkflow, simple: bool = True):
         """Render the model interface."""
+        with gr.Row(elem_classes="bonus-header-row form-inline"):
+            self.pipeline_selector = commons.get_pipeline_selector([])
+            self.load_btn = gr.Button("⬇️ Import Pipeline", variant="secondary")
         self.pipeline_interface = TossupPipelineInterface(
             workflow,
             simple=simple,
                 label="Buzz Confidence",
                 format="webp",
             )
+        self.model_outputs_display = gr.JSON(label="Model Outputs", value="{}", show_indices=True, visible=False)
         self.results_table = gr.DataFrame(
             label="Model Outputs",
             value=pd.DataFrame(columns=["Token Position", "Correct?", "Confidence", "Prediction"]),
+            visible=False,
         )
         with gr.Row():
             self.eval_btn = gr.Button("Evaluate", variant="primary")
         outputs = add_model_scores(outputs, example["clean_answers"], example["run_indices"])
         return outputs
+    def get_pipeline_names(self, profile: gr.OAuthProfile | None) -> list[str]:
+        names = [UNSELECTED_PIPELINE_NAME] + populate.get_pipeline_names("tossup", profile)
+        return gr.update(choices=names, value=UNSELECTED_PIPELINE_NAME)
+    def load_pipeline(self, model_name: str, profile: gr.OAuthProfile | None) -> tuple[str, PipelineState]:
+        try:
+            pipeline_state = populate.load_pipeline("tossup", model_name, profile)
+            if pipeline_state is None:
+                return UNSELECTED_PIPELINE_NAME, gr.skip(), gr.update(visible=False)
+            return UNSELECTED_PIPELINE_NAME, pipeline_state, gr.update(visible=True)
+        except Exception as e:
+            error_msg = styled_error(f"Error loading pipeline: {str(e)}")
+            return UNSELECTED_PIPELINE_NAME, gr.skip(), gr.update(visible=True, value=error_msg)
     def single_run(
         self,
                 tokens_html,
                 gr.update(value=output_state),
                 gr.update(value=plot_data, label=f"Buzz Confidence on Question {question_id + 1}"),
+                gr.update(value=df, label=f"Model Outputs for Question {question_id + 1}", visible=True),
                 gr.update(value=step_outputs, label=f"Step Outputs for Question {question_id + 1}", visible=True),
                 gr.update(visible=False),
             )
                 gr.skip(),
                 gr.skip(),
                 gr.skip(),
+                gr.update(visible=False),
                 gr.update(visible=False),
                 gr.update(visible=True, value=error_msg),
             )
             plot_data = create_scatter_pyplot(token_positions, correctness)
             return (
                 gr.update(value=plot_data, label="Buzz Positions on Sample Set"),
+                gr.update(value=df, label="Scores on Sample Set", visible=True),
                 gr.update(visible=False),
             )
         except Exception as e:
             logger.exception(f"Error evaluating tossups: {e.args}")
             return (
                 gr.skip(),
+                gr.update(visible=False),
                 gr.update(visible=True, value=styled_error(f"Error: {str(e)}\n{traceback.format_exc()}")),
             )
         gr.on(
             triggers=[self.app.load],
+            fn=self.get_pipeline_names,
+            outputs=[self.pipeline_selector],
         )
+        self.new_loaded_pipeline_state = gr.State(value=None)
+        self.load_btn.click(
+            fn=self.load_pipeline,
+            inputs=[self.pipeline_selector],
+            outputs=[self.pipeline_selector, self.new_loaded_pipeline_state, self.error_display],
+        )
+        self.pipeline_interface.add_triggers_for_pipeline_export(
+            [self.new_loaded_pipeline_state.change], self.new_loaded_pipeline_state
+        )
         self.run_btn.click(
             self.pipeline_interface.validate_workflow,

src/display/custom_css.py CHANGED Viewed

@@ -1,19 +1,22 @@
 css_pipeline = """
 :root {
     color-scheme: light !important;
-    --block-border-width: 0;
-    --section-header-text-weight: 600;
-    --section-header-text-size: 14px;
-    --input-radius: var(--radius-xl);
-    --font-mono: "Space Mono", monospace;
-    --text-md: 14px;
-    --text-lg: 16px;
     --body-text-size: 14px !important;
     --input-background-fill-focus: var(--secondary-300) !important;
     // Button Colors
     --button-primary-background-fill: var(--primary-800) !important;
     --button-secondary-background-fill: var(--secondary-600) !important;
     --card-bg-color: #fcecd4;
@@ -23,14 +26,25 @@ css_pipeline = """
     --hover-border-color: #121212;
 }
-.dark {
-    // Button Colors
-    --button-primary-background-fill: var(--primary-100) !important;
-    --button-secondary-background-fill: var(--secondary-200) !important;
     --button-primary-text-color: black !important;
-    --button-secondary-text-color: black !important
-    --block-border-width: 0;
     --card-bg-color: #383127;
     --answer-bg-color: #1a2b3c;
     --hover-border-color: #ffffff;
@@ -44,10 +58,32 @@ css_pipeline = """
     box-shadow: 0 0 0 0 !important;
 }
 .head {
     margin-bottom: 0px;
 }
 .gradio-container {
     max-width: 1500px;
     margin: 0 auto;
@@ -85,6 +121,14 @@ css_pipeline = """
     font-size: 12px;
 }
 .output-fields-panel {
     background-color: var(--card-bg-color);
     border: 0px solid #e0e0e0 !important;
@@ -143,13 +187,12 @@ css_pipeline = """
 }
 .model-dropdown input {
-    font-size: 14px;
     padding-bottom: 2px;
     padding-top: 2px;
 }
 .step-name input {
-    font-size: 14px;
     font-weight: bold;
     padding-bottom: 8px;
     margin-bottom: 4px;
@@ -421,7 +464,7 @@ css_tossup = """
     border-color: #228b22; /* Darker and slightly muted green */
 }
 .tossup-question {
-    line-height: 1.7;
     padding: 5px;
     margin-left: 4px;
     margin-right: 4px;

 css_pipeline = """
 :root {
     color-scheme: light !important;
+    --block-border-width: 0 !important;
+    --section-header-text-weight: 600 !important;
+    --section-header-text-size: 14px !important;
+    --input-radius: var(--radius-xl) !important;
+    --font-mono: "Space Mono", monospace !important;
+    --text-sm: 12px !important;
+    --text-md: 14px !important;
+    --text-lg: 16px !important;
+    --input-text-size: var(--text-sm) !important;
     --body-text-size: 14px !important;
     --input-background-fill-focus: var(--secondary-300) !important;
     // Button Colors
     --button-primary-background-fill: var(--primary-800) !important;
     --button-secondary-background-fill: var(--secondary-600) !important;
+    --checkbox-label-text-color: var(--body-text-color) !important;
     --card-bg-color: #fcecd4;
     --hover-border-color: #121212;
 }
+:root .dark {
+    color-scheme: dark !important;
+    --block-border-width: 0 !important;
+    --section-header-text-weight: 600 !important;
+    --section-header-text-size: 14px !important;
+    --input-radius: var(--radius-xl) !important;
+    --font-mono: "Space Mono", monospace !important;
+    --text-sm: 12px !important;
+    --text-md: 14px !important;
+    --text-lg: 16px !important;
+    --input-text-size: var(--text-sm) !important;
+    --body-text-size: 14px !important;
+    --button-primary-background-fill: var(--neutral-100) !important;
+    --button-secondary-background-fill: var(--secondary-300) !important;
     --button-primary-text-color: black !important;
+    --button-secondary-text-color: black !important;
+    --checkbox-label-text-color: var(--body-text-color) !important;
     --card-bg-color: #383127;
     --answer-bg-color: #1a2b3c;
     --hover-border-color: #ffffff;
     box-shadow: 0 0 0 0 !important;
 }
+.slider-container .wrap {
+    gap: var(--spacing-md) !important;
+}
+.json-node {
+    /* On a light background (usually white), use darker and vivid colors */
+    font-size: var(--text-sm) !important;
+    --text-color: #2e2e2e;             /* Dark grey text for overall readability */
+    --key-color: #d73a49;              /* Bright red for keys */
+    --string-color: #22863a;           /* Bold green for strings */
+    --number-color: #0366d6;           /* Vivid blue for numbers */
+    --bracket-color: #6f42c1;          /* Distinct purple for regular brackets */
+    --square-bracket-color: #e36209;   /* Eye-popping orange for square brackets */
+    --punctuation-color: #17a2b8;       /* Turquoise punctuation */
+    --line-number-color: #6a737d;       /* Used for line numbers if shown */
+    --separator-color: var(--line-number-color);
+}
 .head {
     margin-bottom: 0px;
 }
+.icon-wrap {
+    right: var(--size-1) !important;
+}
 .gradio-container {
     max-width: 1500px;
     margin: 0 auto;
     font-size: 12px;
 }
+.control-panel {
+    gap: var(--spacing-lg) !important;
+}
+.toggleable {
+    gap: var(--spacing-xs) !important;
+}
 .output-fields-panel {
     background-color: var(--card-bg-color);
     border: 0px solid #e0e0e0 !important;
 }
 .model-dropdown input {
     padding-bottom: 2px;
     padding-top: 2px;
 }
 .step-name input {
+    font-size: var(--text-md);
     font-weight: bold;
     padding-bottom: 8px;
     margin-bottom: 4px;
     border-color: #228b22; /* Darker and slightly muted green */
 }
 .tossup-question {
+    line-height: 1.5;
     padding: 5px;
     margin-left: 4px;
     margin-right: 4px;

src/display/guide.py CHANGED Viewed

@@ -12,7 +12,7 @@ GUIDE_MARKDOWN = """
 ## Competition Rules
 ### 🧠 Tossup Questions
-- **Format**: Individual questions with progressive difficulty
 - **Scoring**:
   - Correct early buzz: +10 points
   - Incorrect early buzz: -5 points
@@ -20,10 +20,16 @@ GUIDE_MARKDOWN = """
 - **Required Outputs**:
   - `answer`: Your predicted answer
   - `confidence`: Score between 0-1
-  - Buzz threshold: When to attempt answering
 ### 🎁 Bonus Questions
-- **Format**: Three-part questions (10 points each)
 - **Scoring**: +10 points per correct part (max 30)
 - **Required Outputs**:
   - `answer`: Your predicted answer
@@ -40,7 +46,17 @@ GUIDE_MARKDOWN = """
   - System prompt
   - Required outputs
-### 2. Testing Your Pipeline
 1. Select an example question
 2. For Tossup:
    - Set buzz threshold (0.5-1.0)
@@ -50,15 +66,15 @@ GUIDE_MARKDOWN = """
    - Confidence scores
    - Performance metrics
-### 3. Evaluation
 - Test on multiple questions
 - Monitor:
   - Accuracy
   - Confidence patterns
   - Response times
-### 4. Submission
-1. Log in
 2. Name your model
 3. Add description
 4. Submit for evaluation

 ## Competition Rules
 ### 🧠 Tossup Questions
+- **Format**: Individual questions progressively revealed. Questions get easier as they are revealed.
 - **Scoring**:
   - Correct early buzz: +10 points
   - Incorrect early buzz: -5 points
 - **Required Outputs**:
   - `answer`: Your predicted answer
   - `confidence`: Score between 0-1
+  - `buzzer`: When to attempt answering
+    - Configure with confidence threshold (0.0-1.0)
+    - Optional token probability threshold for more control
+    - Combine thresholds using AND/OR logic (buzz when both/either condition met)
 ### 🎁 Bonus Questions
+- **Format**:
+  - Consists of a `leadin` paragraph that introduces the topic
+  - Followed by three related `parts` (A, B, C) that test specific knowledge
+  - Each part is worth 10 points
 - **Scoring**: +10 points per correct part (max 30)
 - **Required Outputs**:
   - `answer`: Your predicted answer
   - System prompt
   - Required outputs
+### 2. Using Demo Pipelines
+- Load existing demo pipelines as starting points
+- Modify configurations:
+  - Adjust model parameters
+  - Update system prompts
+  - Change confidence thresholds
+  - Add/remove pipeline steps
+- Save modified versions as new pipelines
+- Test changes incrementally
+### 3. Testing Your Pipeline
 1. Select an example question
 2. For Tossup:
    - Set buzz threshold (0.5-1.0)
    - Confidence scores
    - Performance metrics
+### 4. Evaluation
 - Test on multiple questions
 - Monitor:
   - Accuracy
   - Confidence patterns
   - Response times
+### 5. Submission
+1. Log in to Hugging Face
 2. Name your model
 3. Add description
 4. Submit for evaluation

src/envs.py CHANGED Viewed

@@ -14,10 +14,12 @@ OWNER = (
 )
 # ----------------------------------
-REPO_ID = f"{OWNER}/advcal-leaderboard"
 QUEUE_REPO = f"{OWNER}/advcal-requests"
 RESULTS_REPO = f"{OWNER}/advcal-results"
 PLAYGROUND_DATASET_NAMES = {
     "tossup": "umdclip/acf-co24-tossups",
     "bonus": "umdclip/acf-co24-bonuses",

 )
 # ----------------------------------
+REPO_ID = f"{OWNER}/quizbowl-submission"
 QUEUE_REPO = f"{OWNER}/advcal-requests"
 RESULTS_REPO = f"{OWNER}/advcal-results"
+EXAMPLES_PATH = "examples"
 PLAYGROUND_DATASET_NAMES = {
     "tossup": "umdclip/acf-co24-tossups",
     "bonus": "umdclip/acf-co24-bonuses",

src/submission/structs.py CHANGED Viewed

@@ -3,7 +3,7 @@ from typing import Dict, List, Literal, Optional
 from pydantic import BaseModel, Field
-from workflows.structs import Workflow
 CompetitionType = Literal["tossup", "bonus"]
 SubmissionType = Literal["python_file", "simple_workflow", "complex_workflow"]
@@ -54,5 +54,8 @@ class Submission(BaseModel):
     def from_dict(cls, data: Dict) -> "Submission":
         """Create instance from dictionary format used in HF datasets"""
         if data.get("workflow"):
-            data["workflow"] = Workflow.model_validate(data["workflow"])
         return cls.model_validate(data)

 from pydantic import BaseModel, Field
+from workflows.structs import TossupWorkflow, Workflow
 CompetitionType = Literal["tossup", "bonus"]
 SubmissionType = Literal["python_file", "simple_workflow", "complex_workflow"]
     def from_dict(cls, data: Dict) -> "Submission":
         """Create instance from dictionary format used in HF datasets"""
         if data.get("workflow"):
+            if data["competition_type"] == "tossup":
+                data["workflow"] = TossupWorkflow.model_validate(data["workflow"])
+            else:
+                data["workflow"] = Workflow.model_validate(data["workflow"])
         return cls.model_validate(data)

src/submission/submit.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import json
 import logging
 import os
@@ -5,12 +6,14 @@ import traceback
 from datetime import datetime, timedelta, timezone
 import gradio as gr
 from app_configs import DAILY_SUBMISSION_LIMIT_PER_USER
 from display.formatting import styled_error, styled_message
-from envs import API, EVAL_REQUESTS_PATH, QUEUE_REPO
 from submission.structs import CompetitionType, Submission, SubmissionStatus
-from workflows.structs import Workflow
 def get_user_submissions(username: str, competition_type: str, pattern: str = None) -> list[Submission]:
@@ -24,18 +27,28 @@ def get_user_submissions(username: str, competition_type: str, pattern: str = No
             continue
         if pattern is not None and pattern not in file:
             continue
-        with open(os.path.join(out_dir, file), "r") as f:
-            submission = Submission.from_dict(json.load(f))
             submissions.append(submission)
     return submissions
 def get_user_submission_names(competition_type: str, profile: gr.OAuthProfile | None) -> list[str]:
     """Get all submission model names for a user."""
     if profile is None:
         return []
     submissions = get_user_submissions(profile.username, competition_type)
-    return [s.model_name for s in submissions]
 def get_user_submissions_today(username: str, competition_type: str) -> list[Submission]:
@@ -170,6 +183,20 @@ def submit_model(
         return styled_error(f"Error submitting model: {str(e)}")
 def load_submission(model_name: str, competition_type: CompetitionType, profile: gr.OAuthProfile | None) -> Submission:
     if profile is None:
         logging.error("Authentication required. Please log in to view your submissions.")

+import glob
 import json
 import logging
 import os
 from datetime import datetime, timedelta, timezone
 import gradio as gr
+import yaml
+from loguru import logger
 from app_configs import DAILY_SUBMISSION_LIMIT_PER_USER
 from display.formatting import styled_error, styled_message
+from envs import API, EVAL_REQUESTS_PATH, EXAMPLES_PATH, QUEUE_REPO
 from submission.structs import CompetitionType, Submission, SubmissionStatus
+from workflows.structs import TossupWorkflow, Workflow
 def get_user_submissions(username: str, competition_type: str, pattern: str = None) -> list[Submission]:
             continue
         if pattern is not None and pattern not in file:
             continue
+        try:
+            with open(os.path.join(out_dir, file), "r") as f:
+                submission = Submission.from_dict(json.load(f))
             submissions.append(submission)
+        except Exception as e:
+            logger.error(f"Error loading submission {file}: {e}")
     return submissions
 def get_user_submission_names(competition_type: str, profile: gr.OAuthProfile | None) -> list[str]:
     """Get all submission model names for a user."""
     if profile is None:
+        logger.warning("No user profile provided. Returning empty list.")
         return []
     submissions = get_user_submissions(profile.username, competition_type)
+    return [f"{s.username}/{s.model_name}" for s in submissions]
+def get_demo_example_submissions(competition_type: str) -> list[str]:
+    """Get all submissions for a demo example."""
+    examples_dir = f"{EXAMPLES_PATH}/{competition_type}"
+    return [f"umdclip/{os.path.basename(f).removesuffix('.yaml')}" for f in glob.glob(f"{examples_dir}/*.yaml")]
 def get_user_submissions_today(username: str, competition_type: str) -> list[Submission]:
         return styled_error(f"Error submitting model: {str(e)}")
+def load_demo_example(model_name: str, competition_type: CompetitionType) -> Workflow:
+    """Load a demo example submission."""
+    examples_dir = f"{EXAMPLES_PATH}/{competition_type}"
+    filepath = f"{examples_dir}/{model_name}.yaml"
+    if not os.path.exists(filepath):
+        raise ValueError(f"Demo example file {filepath} not found")
+    with open(filepath, "r") as f:
+        yaml_data = yaml.safe_load(f)
+    if competition_type == "tossup":
+        return TossupWorkflow.model_validate(yaml_data)
+    else:
+        return Workflow.model_validate(yaml_data)
 def load_submission(model_name: str, competition_type: CompetitionType, profile: gr.OAuthProfile | None) -> Submission:
     if profile is None:
         logging.error("Authentication required. Please log in to view your submissions.")

src/workflows/structs.py CHANGED Viewed

@@ -107,6 +107,9 @@ class ModelStep(BaseModel):
     input_fields: list[InputField]
     output_fields: list[OutputField]
     def fields(self, field_type: FieldType) -> list[InputField | OutputField]:
         return self.input_fields if field_type == "input" else self.output_fields
@@ -252,6 +255,9 @@ class Buzzer(BaseModel):
     confidence_threshold: float = Field(default=0.8, ge=0.0, le=1.0)  # Minimum confidence to trigger a buzz
     prob_threshold: float | None = None  # Optional log probability threshold
     def run(self, confidence: float, prob: float | None = None, logprob: float | None = None) -> bool:
         """Run the buzzer logic."""
         if logprob is not None and prob is not None:

     input_fields: list[InputField]
     output_fields: list[OutputField]
+    class Config:
+        use_enum_values = True
     def fields(self, field_type: FieldType) -> list[InputField | OutputField]:
         return self.input_fields if field_type == "input" else self.output_fields
     confidence_threshold: float = Field(default=0.8, ge=0.0, le=1.0)  # Minimum confidence to trigger a buzz
     prob_threshold: float | None = None  # Optional log probability threshold
+    class Config:
+        use_enum_values = True
     def run(self, confidence: float, prob: float | None = None, logprob: float | None = None) -> bool:
         """Run the buzzer logic."""
         if logprob is not None and prob is not None: