Spaces:

qanta-challenge
/

quizbowl-submission

Running

quizbowl-submission / src /workflows /quizbowl_agent.py

Maharshi Gor

First Working commit

193db9d about 1 month ago

9.77 kB

	# %%
	import json
	import os
	import time
	from typing import Dict, Iterable, List, Optional, Tuple, Union

	import litellm
	from datasets import load_dataset
	from litellm import completion

	litellm.drop_params = True

	# Set your API key - you can replace this with your actual key or use environment variables
	os.environ["OPENAI_API_KEY"] = (
	"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"
	)

	DEFAULT_SYS_PROMPT = """
	You are a Quizbowl expert. You will be given a question that's progressively revealed.
	Your goal is to identify the answer as quickly as possible with high confidence.
	Respond with a JSON object with two fields:
	1. "answer": Your best guess for the answer
	2. "confidence": Your confidence in your answer from 0.0 to 1.0

	DO NOT include any explanation. ONLY return the JSON object.
	"""


	class QuizbowlAgent:
	"""
	An agent for playing Quizbowl with two modes:
	1. Tossup mode: Fast and direct with confidence calibration for buzzing
	2. Bonus round mode: Provides guess, rationale, and confidence
	"""

	def __init__(
	self,
	model: str = "gpt-4o-mini",
	buzz_threshold: float = 0.85,
	temperature: float = 0.2,
	system_prompt: str = DEFAULT_SYS_PROMPT,
	):
	"""
	Initialize the QuizbowlAgent.

	Args:
	model: The LLM model to use for answering
	buzz_threshold: Confidence threshold for buzzing in tossup mode (0-1)
	temperature: Temperature for model sampling
	"""
	self.model = model
	self.buzz_threshold = buzz_threshold
	self.temperature = temperature
	self.system_prompt = system_prompt

	def _process_question_runs(self, question_runs: List[str]) -> List[str]:
	"""Process question runs to extract increasing amounts of text."""
	# For simpler testing, just return the runs as they are in the dataset
	return question_runs

	def _get_agent_response(self, prompt: str, system_prompt: str) -> Dict:
	"""Get response from the LLM model."""
	messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}]

	start_time = time.time()
	response = completion(
	model=self.model,
	messages=messages,
	temperature=self.temperature,
	max_tokens=150, # Limit token usage for faster responses
	)
	response_time = time.time() - start_time

	return response, response_time

	def _extract_confidence_and_answer(self, content: str) -> Tuple[str, float]:
	"""Extract the answer and confidence score from the model response."""
	try:
	# Try to parse JSON from the response
	data = json.loads(content)
	answer = data.get("answer", "")
	confidence = float(data.get("confidence", 0.0))
	return answer, confidence
	except (json.JSONDecodeError, ValueError):
	# Fallback if parsing fails
	lines = content.strip().split("\n")
	answer = lines[0] if lines else ""
	confidence = 0.5 # Default confidence

	# Try to extract confidence from text
	for line in lines:
	if "confidence:" in line.lower():
	try:
	confidence = float(line.lower().split("confidence:")[1].strip())
	except (ValueError, IndexError):
	pass

	return answer, confidence

	def tossup_mode(self, question_runs: List[str]) -> Iterable[Dict]:
	"""
	Process a tossup question and decide when to buzz based on confidence.

	Args:
	question_runs: Progressive reveals of the question text

	Yields:
	Dict with answer, confidence, and whether to buzz
	"""

	for i, question_text in enumerate(question_runs):
	prompt = f"Question: {question_text}\n\nProvide your answer and confidence level:"

	response, response_time = self._get_agent_response(prompt, DEFAULT_SYS_PROMPT)
	content = response.choices[0].message.content

	answer, confidence = self._extract_confidence_and_answer(content)

	result = {
	"answer": answer,
	"confidence": confidence,
	"buzz": confidence >= self.buzz_threshold,
	"question_fragment": question_text,
	"position": i + 1,
	"full_response": content,
	"response_time": response_time,
	}

	yield result

	# If we've reached the confidence threshold, buzz and stop
	if confidence >= self.buzz_threshold:
	return

	def tossup_mode_top5(self, question_runs: List[str]) -> Iterable[Dict]:
	"""
	Process a tossup question and provide the top 5 guesses with confidence levels.

	Args:
	question_runs: Progressive reveals of the question text

	Returns:
	Dict with top 5 answers, their confidences, and whether to buzz
	"""

	for i, question_text in enumerate(question_runs):
	prompt = f"Question: {question_text}\n\nProvide your top 5 answers and confidence levels."

	response, response_time = self._get_agent_response(prompt, self.system_prompt)
	content = response.choices[0].message.content

	try:
	# Try to parse JSON from the response
	data = json.loads(content)
	guesses = data.get("guesses", [])
	except (json.JSONDecodeError, ValueError):
	# Fallback if parsing fails
	guesses = []

	result = {
	"guesses": guesses,
	"buzz": any(guess["confidence"] >= self.buzz_threshold for guess in guesses),
	"question_fragment": question_text,
	"position": i + 1,
	"full_response": content,
	"response_time": response_time,
	}

	yield result

	# If any guess reaches the confidence threshold, buzz and stop
	if result["buzz"]:
	return

	def bonus_round_mode(self, question: str) -> Dict:
	"""
	Process a bonus round question with detailed analysis.

	Args:
	question: The bonus question text

	Returns:
	Dict with answer, rationale, and confidence
	"""
	system_prompt = """
	You are a Quizbowl expert answering a bonus question. Provide:
	1. Your direct answer
	2. A very brief and crisp one line rationale for your answer (key clues that led to it)
	3. Your confidence level (0.0-1.0)

	Respond with a JSON object with these three fields:
	{
	"answer": "Your answer here",
	"rationale": "Your reasoning here",
	"confidence": 0.XX
	}
	"""

	prompt = f"Bonus Question: {question}\n\nProvide your answer, rationale, and confidence:"

	response = self._get_agent_response(prompt, system_prompt)
	content = response.choices[0].message.content

	try:
	# Try to parse JSON
	result = json.loads(content)
	# Ensure all fields are present
	if not all(k in result for k in ["answer", "rationale", "confidence"]):
	raise ValueError("Missing fields in response")
	except (json.JSONDecodeError, ValueError):
	# If parsing fails, extract manually
	lines = content.strip().split("\n")
	result = {"answer": "", "rationale": "", "confidence": 0.5}

	for line in lines:
	if line.lower().startswith("answer:"):
	result["answer"] = line[7:].strip()
	elif line.lower().startswith("rationale:"):
	result["rationale"] = line[10:].strip()
	elif line.lower().startswith("confidence:"):
	try:
	result["confidence"] = float(line[11:].strip())
	except ValueError:
	pass

	return result


	# %%
	# Example usage
	if __name__ == "__main__":
	# Load the Quizbowl dataset
	ds_name = "umdclip/leaderboard_co_set"
	ds = load_dataset(ds_name, split="train")

	# Create the agent
	agent = QuizbowlAgent(model="gpt-4-turbo", buzz_threshold=0.85)

	# Example for tossup mode
	print("\n=== TOSSUP MODE EXAMPLE ===")
	sample_question = ds[0]
	print(sample_question["question_runs"][-1])
	print(sample_question["gold_label"])
	question_runs = sample_question["question_runs"]

	results = agent.tossup_mode(question_runs)
	for result in results:
	print(f"Guess at position {result['position']}: {result['answer']}")
	print(f"Confidence: {result['confidence']}")
	if result["buzz"]:
	print("Buzzed!\n")

	results = agent.tossup_mode_top5(question_runs)
	for result in results:
	guesses = [f"{guess['answer']} ({guess['confidence']})" for guess in result["guesses"]]
	print(f"Guesses at position {result['position']}: {', '.join(guesses)}")
	if result["buzz"]:
	print("Buzzed!")

	# Example for bonus round mode
	print("\n=== BONUS ROUND MODE EXAMPLE ===")
	bonus_question = sample_question["question_runs"][-1]

	bonus_result = agent.bonus_round_mode(bonus_question)
	print(f"Answer: {bonus_result['answer']}")
	print(f"Rationale: {bonus_result['rationale']}")
	print(f"Confidence: {bonus_result['confidence']}")

	# %%