Spaces:

qanta-challenge
/

quizbowl-submission

Running

quizbowl-submission / src /components /quizbowl /utils.py

Maharshi Gor

Updates and Refactor in QB Interfaces:

3b39b49 about 1 month ago

2.66 kB

	from typing import Any, Dict, List

	import pandas as pd


	def evaluate_prediction(prediction: str, clean_answers: list[str] \| str) -> int:
	"""Evaluate the buzz of a prediction against the clean answers."""
	if isinstance(clean_answers, str):
	print("clean_answers is a string")
	clean_answers = [clean_answers]
	pred = prediction.lower().strip()
	if not pred:
	return 0
	for answer in clean_answers:
	answer = answer.strip().lower()
	if answer and answer in pred:
	print(f"Found {answer} in {pred}")
	return 1
	return 0


	def _create_confidence_plot_data(results: List[Dict], top_k_mode: bool = False) -> pd.DataFrame:
	"""Create a DataFrame for the confidence plot."""
	if not top_k_mode:
	return pd.DataFrame(
	{
	"position": [r["position"] for r in results],
	"confidence": [r["confidence"] for r in results],
	"answer": [r["answer"] for r in results],
	}
	)

	# For top-k mode, extract and plot top answers
	return _create_top_k_plot_data(results)


	def _create_top_k_plot_data(results: List[Dict]) -> pd.DataFrame:
	"""Create plot data for top-k mode."""
	# Find top answers across all positions (limited to top 5)
	top_answers = set()
	for r in results:
	for g in r.get("guesses", [])[:3]: # Get top 3 from each position
	if g.get("answer"):
	top_answers.add(g.get("answer"))

	top_answers = list(top_answers)[:5] # Limit to 5 total answers

	# Create plot data for each answer
	all_data = []
	for position_idx, result in enumerate(results):
	position = result["position"]
	for answer in top_answers:
	confidence = 0
	for guess in result.get("guesses", []):
	if guess.get("answer") == answer:
	confidence = guess.get("confidence", 0)
	break
	all_data.append({"position": position, "confidence": confidence, "answer": answer})

	return pd.DataFrame(all_data)


	def _create_top_k_dataframe(results: List[Dict]) -> pd.DataFrame:
	"""Create a DataFrame for top-k results."""
	df_rows = []
	for result in results:
	position = result["position"]
	for i, guess in enumerate(result.get("guesses", [])):
	df_rows.append(
	{
	"position": position,
	"answer": guess.get("answer", ""),
	"confidence": guess.get("confidence", 0),
	"rank": i + 1,
	}
	)
	return pd.DataFrame(df_rows)