quizbowl-submission / src /workflows /qb /multi_step_agent.py
Maharshi Gor
Enhance model provider detection and add repository management script. Added support for multi step agent.
973519b
raw
history blame
7.33 kB
import time
from typing import Any, Iterable
from workflows.executors import execute_workflow
from workflows.structs import Workflow
def _get_workflow_response(workflow: Workflow, available_vars: dict[str, Any]) -> tuple[dict[str, Any], str, float]:
"""Get response from executing a complete workflow."""
start_time = time.time()
response, content = execute_workflow(workflow, available_vars, return_full_content=True)
response_time = time.time() - start_time
return response, content, response_time
class MultiStepTossupAgent:
"""Agent for handling tossup questions with multiple steps in the workflow."""
external_input_variable = "question_text"
output_variables = ["answer", "confidence"]
def __init__(self, workflow: Workflow, buzz_threshold: float):
"""Initialize the multi-step tossup agent.
Args:
workflow: The workflow containing multiple steps
buzz_threshold: Confidence threshold for buzzing
"""
self.workflow = workflow
self.buzz_threshold = buzz_threshold
self.output_variables = list(workflow.outputs.keys())
# Validate input variables
if self.external_input_variable not in workflow.inputs:
raise ValueError(f"External input variable {self.external_input_variable} not found in workflow inputs")
# Validate output variables
for out_var in self.output_variables:
if out_var not in workflow.outputs:
raise ValueError(f"Output variable {out_var} not found in workflow outputs")
def run(self, question_runs: list[str], early_stop: bool = True) -> Iterable[dict]:
"""Process a tossup question and decide when to buzz based on confidence.
Args:
question_runs: Progressive reveals of the question text
early_stop: Whether to stop after the first buzz
Yields:
Dict containing:
- answer: The model's answer
- confidence: Confidence score
- buzz: Whether to buzz
- question_fragment: Current question text
- position: Current position in question
- full_response: Complete model response
- response_time: Time taken for response
- step_outputs: Outputs from each step
"""
for i, question_text in enumerate(question_runs):
# Execute the complete workflow
response, content, response_time = _get_workflow_response(
self.workflow, {self.external_input_variable: question_text}
)
buzz = response["confidence"] >= self.buzz_threshold
result = {
"answer": response["answer"],
"confidence": response["confidence"],
"buzz": buzz,
"question_fragment": question_text,
"position": i + 1,
"full_response": content,
"response_time": response_time,
"step_outputs": response.get("step_outputs", {}), # Include intermediate step outputs
}
yield result
# If we've reached the confidence threshold, buzz and stop
if early_stop and buzz:
return
class MultiStepBonusAgent:
"""Agent for handling bonus questions with multiple steps in the workflow."""
external_input_variables = ["leadin", "part"]
output_variables = ["answer", "confidence", "explanation"]
def __init__(self, workflow: Workflow):
"""Initialize the multi-step bonus agent.
Args:
workflow: The workflow containing multiple steps
"""
self.workflow = workflow
self.output_variables = list(workflow.outputs.keys())
# Validate input variables
for input_var in self.external_input_variables:
if input_var not in workflow.inputs:
raise ValueError(f"External input variable {input_var} not found in workflow inputs")
# Validate output variables
for out_var in self.output_variables:
if out_var not in workflow.outputs:
raise ValueError(f"Output variable {out_var} not found in workflow outputs")
def run(self, leadin: str, part: str) -> dict:
"""Process a bonus part with the given leadin.
Args:
leadin: The leadin text for the bonus question
part: The specific part text to answer
Returns:
Dict containing:
- answer: The model's answer
- confidence: Confidence score
- explanation: Explanation for the answer
- full_response: Complete model response
- response_time: Time taken for response
- step_outputs: Outputs from each step
"""
response, content, response_time = _get_workflow_response(
self.workflow,
{
"leadin": leadin,
"part": part,
},
)
return {
"answer": response["answer"],
"confidence": response["confidence"],
"explanation": response["explanation"],
"full_response": content,
"response_time": response_time,
"step_outputs": response.get("step_outputs", {}), # Include intermediate step outputs
}
# Example usage
if __name__ == "__main__":
# Load the Quizbowl dataset
from datasets import load_dataset
from workflows.factory import create_quizbowl_bonus_workflow, create_quizbowl_tossup_workflow
ds_name = "umdclip/leaderboard_co_set"
ds = load_dataset(ds_name, split="train")
# Create the agents with multi-step workflows
tossup_workflow = create_quizbowl_tossup_workflow()
tossup_agent = MultiStepTossupAgent(workflow=tossup_workflow, buzz_threshold=0.9)
bonus_workflow = create_quizbowl_bonus_workflow()
bonus_agent = MultiStepBonusAgent(workflow=bonus_workflow)
# Example for tossup mode
print("\n=== TOSSUP MODE EXAMPLE ===")
sample_question = ds[30]
print(sample_question["question_runs"][-1])
print(sample_question["gold_label"])
print()
question_runs = sample_question["question_runs"]
results = tossup_agent.run(question_runs, early_stop=True)
for result in results:
print(result["full_response"])
print(f"Guess at position {result['position']}: {result['answer']}")
print(f"Confidence: {result['confidence']}")
print("Step outputs:", result["step_outputs"])
if result["buzz"]:
print("Buzzed!\n")
# Example for bonus mode
print("\n=== BONUS MODE EXAMPLE ===")
sample_bonus = ds[31] # Assuming this is a bonus question
leadin = sample_bonus["leadin"]
parts = sample_bonus["parts"]
print(f"Leadin: {leadin}")
for i, part in enumerate(parts):
print(f"\nPart {i + 1}: {part['part']}")
result = bonus_agent.run(leadin, part["part"])
print(f"Answer: {result['answer']}")
print(f"Confidence: {result['confidence']}")
print(f"Explanation: {result['explanation']}")
print(f"Response time: {result['response_time']:.2f}s")
print("Step outputs:", result["step_outputs"])