from typing import Any, Dict, List import pandas as pd def evaluate_prediction(prediction: str, clean_answers: list[str] | str) -> int: """Evaluate the buzz of a prediction against the clean answers.""" if isinstance(clean_answers, str): print("clean_answers is a string") clean_answers = [clean_answers] pred = prediction.lower().strip() if not pred: return 0 for answer in clean_answers: answer = answer.strip().lower() if answer and answer in pred: print(f"Found {answer} in {pred}") return 1 return 0 def _create_confidence_plot_data(results: List[Dict], top_k_mode: bool = False) -> pd.DataFrame: """Create a DataFrame for the confidence plot.""" if not top_k_mode: return pd.DataFrame( { "position": [r["position"] for r in results], "confidence": [r["confidence"] for r in results], "answer": [r["answer"] for r in results], } ) # For top-k mode, extract and plot top answers return _create_top_k_plot_data(results) def _create_top_k_plot_data(results: List[Dict]) -> pd.DataFrame: """Create plot data for top-k mode.""" # Find top answers across all positions (limited to top 5) top_answers = set() for r in results: for g in r.get("guesses", [])[:3]: # Get top 3 from each position if g.get("answer"): top_answers.add(g.get("answer")) top_answers = list(top_answers)[:5] # Limit to 5 total answers # Create plot data for each answer all_data = [] for position_idx, result in enumerate(results): position = result["position"] for answer in top_answers: confidence = 0 for guess in result.get("guesses", []): if guess.get("answer") == answer: confidence = guess.get("confidence", 0) break all_data.append({"position": position, "confidence": confidence, "answer": answer}) return pd.DataFrame(all_data) def _create_top_k_dataframe(results: List[Dict]) -> pd.DataFrame: """Create a DataFrame for top-k results.""" df_rows = [] for result in results: position = result["position"] for i, guess in enumerate(result.get("guesses", [])): df_rows.append( { "position": position, "answer": guess.get("answer", ""), "confidence": guess.get("confidence", 0), "rank": i + 1, } ) return pd.DataFrame(df_rows)