Spaces:

qanta-challenge
/

quizbowl-submission

Running

File size: 7,329 Bytes

973519b

import time
from typing import Any, Iterable

from workflows.executors import execute_workflow
from workflows.structs import Workflow


def _get_workflow_response(workflow: Workflow, available_vars: dict[str, Any]) -> tuple[dict[str, Any], str, float]:
    """Get response from executing a complete workflow."""
    start_time = time.time()
    response, content = execute_workflow(workflow, available_vars, return_full_content=True)
    response_time = time.time() - start_time
    return response, content, response_time


class MultiStepTossupAgent:
    """Agent for handling tossup questions with multiple steps in the workflow."""

    external_input_variable = "question_text"
    output_variables = ["answer", "confidence"]

    def __init__(self, workflow: Workflow, buzz_threshold: float):
        """Initialize the multi-step tossup agent.

        Args:
            workflow: The workflow containing multiple steps
            buzz_threshold: Confidence threshold for buzzing
        """
        self.workflow = workflow
        self.buzz_threshold = buzz_threshold
        self.output_variables = list(workflow.outputs.keys())

        # Validate input variables
        if self.external_input_variable not in workflow.inputs:
            raise ValueError(f"External input variable {self.external_input_variable} not found in workflow inputs")

        # Validate output variables
        for out_var in self.output_variables:
            if out_var not in workflow.outputs:
                raise ValueError(f"Output variable {out_var} not found in workflow outputs")

    def run(self, question_runs: list[str], early_stop: bool = True) -> Iterable[dict]:
        """Process a tossup question and decide when to buzz based on confidence.

        Args:
            question_runs: Progressive reveals of the question text
            early_stop: Whether to stop after the first buzz

        Yields:
            Dict containing:
                - answer: The model's answer
                - confidence: Confidence score
                - buzz: Whether to buzz
                - question_fragment: Current question text
                - position: Current position in question
                - full_response: Complete model response
                - response_time: Time taken for response
                - step_outputs: Outputs from each step
        """
        for i, question_text in enumerate(question_runs):
            # Execute the complete workflow
            response, content, response_time = _get_workflow_response(
                self.workflow, {self.external_input_variable: question_text}
            )

            buzz = response["confidence"] >= self.buzz_threshold
            result = {
                "answer": response["answer"],
                "confidence": response["confidence"],
                "buzz": buzz,
                "question_fragment": question_text,
                "position": i + 1,
                "full_response": content,
                "response_time": response_time,
                "step_outputs": response.get("step_outputs", {}),  # Include intermediate step outputs
            }

            yield result

            # If we've reached the confidence threshold, buzz and stop
            if early_stop and buzz:
                return


class MultiStepBonusAgent:
    """Agent for handling bonus questions with multiple steps in the workflow."""

    external_input_variables = ["leadin", "part"]
    output_variables = ["answer", "confidence", "explanation"]

    def __init__(self, workflow: Workflow):
        """Initialize the multi-step bonus agent.

        Args:
            workflow: The workflow containing multiple steps
        """
        self.workflow = workflow
        self.output_variables = list(workflow.outputs.keys())

        # Validate input variables
        for input_var in self.external_input_variables:
            if input_var not in workflow.inputs:
                raise ValueError(f"External input variable {input_var} not found in workflow inputs")

        # Validate output variables
        for out_var in self.output_variables:
            if out_var not in workflow.outputs:
                raise ValueError(f"Output variable {out_var} not found in workflow outputs")

    def run(self, leadin: str, part: str) -> dict:
        """Process a bonus part with the given leadin.

        Args:
            leadin: The leadin text for the bonus question
            part: The specific part text to answer

        Returns:
            Dict containing:
                - answer: The model's answer
                - confidence: Confidence score
                - explanation: Explanation for the answer
                - full_response: Complete model response
                - response_time: Time taken for response
                - step_outputs: Outputs from each step
        """
        response, content, response_time = _get_workflow_response(
            self.workflow,
            {
                "leadin": leadin,
                "part": part,
            },
        )

        return {
            "answer": response["answer"],
            "confidence": response["confidence"],
            "explanation": response["explanation"],
            "full_response": content,
            "response_time": response_time,
            "step_outputs": response.get("step_outputs", {}),  # Include intermediate step outputs
        }


# Example usage
if __name__ == "__main__":
    # Load the Quizbowl dataset
    from datasets import load_dataset

    from workflows.factory import create_quizbowl_bonus_workflow, create_quizbowl_tossup_workflow

    ds_name = "umdclip/leaderboard_co_set"
    ds = load_dataset(ds_name, split="train")

    # Create the agents with multi-step workflows
    tossup_workflow = create_quizbowl_tossup_workflow()
    tossup_agent = MultiStepTossupAgent(workflow=tossup_workflow, buzz_threshold=0.9)

    bonus_workflow = create_quizbowl_bonus_workflow()
    bonus_agent = MultiStepBonusAgent(workflow=bonus_workflow)

    # Example for tossup mode
    print("\n=== TOSSUP MODE EXAMPLE ===")
    sample_question = ds[30]
    print(sample_question["question_runs"][-1])
    print(sample_question["gold_label"])
    print()
    question_runs = sample_question["question_runs"]

    results = tossup_agent.run(question_runs, early_stop=True)
    for result in results:
        print(result["full_response"])
        print(f"Guess at position {result['position']}: {result['answer']}")
        print(f"Confidence: {result['confidence']}")
        print("Step outputs:", result["step_outputs"])
        if result["buzz"]:
            print("Buzzed!\n")

    # Example for bonus mode
    print("\n=== BONUS MODE EXAMPLE ===")
    sample_bonus = ds[31]  # Assuming this is a bonus question
    leadin = sample_bonus["leadin"]
    parts = sample_bonus["parts"]

    print(f"Leadin: {leadin}")
    for i, part in enumerate(parts):
        print(f"\nPart {i + 1}: {part['part']}")
        result = bonus_agent.run(leadin, part["part"])
        print(f"Answer: {result['answer']}")
        print(f"Confidence: {result['confidence']}")
        print(f"Explanation: {result['explanation']}")
        print(f"Response time: {result['response_time']:.2f}s")
        print("Step outputs:", result["step_outputs"])