import json import logging import re from collections import Counter import matplotlib.pyplot as plt import pandas as pd def _make_answer_html(answer: str, clean_answers: list[str] = []) -> str: clean_answers = [a for a in clean_answers if len(a.split()) <= 6 and a != answer] additional_answers_html = "" if clean_answers: additional_answers_html = f" [or {', '.join(clean_answers)}]" return f"""
Answer: {answer} {additional_answers_html}
""" def _get_token_classes(confidence, buzz, score) -> str: if confidence is None: return "token" elif not buzz: return "token guess-point no-buzz" else: return f"token guess-point buzz-{score}" def _create_token_tooltip_html(values) -> str: if not values: return "" confidence = values.get("confidence", 0) buzz = values.get("buzz", 0) score = values.get("score", 0) answer = values.get("answer", "") answer_tokens = answer.split() if len(answer_tokens) > 10: k = len(answer_tokens) - 10 answer = " ".join(answer_tokens[:10]) + f"...[{k} more words]" color = "#a3c9a3" if score else "#ebbec4" # Light green for correct, light pink for incorrect return f"""

💡 Answer

{answer}

📊 Confidence: {confidence:.2f}

🔍 Status: {"✅ Correct" if score else "❌ Incorrect" if buzz else "🚫 No Buzz"}

""" def create_token_html(token: str, values: dict, i: int) -> str: confidence = values.get("confidence", None) buzz = values.get("buzz", 0) score = values.get("score", 0) # Replace non-word characters for proper display in HTML display_token = f"{token} 🚨" if buzz else f"{token} 💭" if values else token if not re.match(r"\w+", token): display_token = token.replace(" ", " ") css_class = _get_token_classes(confidence, buzz, score) # Add tooltip if we have values for this token tooltip_html = _create_token_tooltip_html(values) token_html = f'{display_token}{tooltip_html}' # if i in marker_indices: # token_html += "|" return token_html def create_tossup_html( tokens: list[str], answer_primary: str, clean_answers: list[str], marker_indices: list[int] = [], eval_points: list[tuple[int, dict]] = [], ) -> str: """Create HTML for tokens with hover capability and a colored header for the answer.""" try: ep = dict(eval_points) marker_indices = set(marker_indices) html_tokens = [] for i, token in enumerate(tokens): token_html = create_token_html(token, ep.get(i, {}), i + 1) html_tokens.append(token_html) answer_html = _make_answer_html(answer_primary, clean_answers) return f"""
{"".join(html_tokens)}
{answer_html}
""" except Exception as e: logging.error(f"Error creating token HTML: {e}", exc_info=True) return f"
Error creating tokens: {str(e)}
" def create_bonus_html(leadin: str, parts: list[dict]) -> str: # Create HTML for leadin and parts with answers leadin_html = f"
{leadin}
" parts_html = [] for i, part in enumerate(parts): question_text = part["part"] answer_html = _make_answer_html(part["answer_primary"], part["clean_answers"]) "
Part {i + 1}
" part_html = f"""
#{i + 1}. {question_text}
{answer_html}
""" parts_html.append(part_html) html_content = f"""
{leadin_html} {"".join(parts_html)}
""" # Format clean answers for the answer display clean_answers = [] for i, part in enumerate(parts): part_answers = [a for a in part["clean_answers"] if len(a.split()) <= 6] clean_answers.append(f"{i + 1}. {', '.join(part_answers)}") return html_content def create_line_plot(eval_points: list[tuple[int, dict]], highlighted_index: int = -1) -> pd.DataFrame: """Create a Gradio LinePlot of token values with optional highlighting using DataFrame.""" try: # Create base confidence data data = [] # Add buzz points to the plot for i, (v, b) in eval_points: color = "#ff4444" if b == 0 else "#228b22" data.append( { "position": i, "value": v, "type": "buzz", "highlight": True, "color": color, } ) if highlighted_index >= 0: # Add vertical line for the highlighted token data.extend( [ { "position": highlighted_index, "value": 0, "type": "hover-line", "color": "#000000", "highlight": True, }, { "position": highlighted_index, "value": 1, "type": "hover-line", "color": "#000000", "highlight": True, }, ] ) return pd.DataFrame(data) except Exception as e: logging.error(f"Error creating line plot: {e}", exc_info=True) # Return an empty DataFrame with the expected columns return pd.DataFrame(columns=["position", "value", "type", "highlight", "color"]) def create_tossup_confidence_pyplot( tokens: list[str], eval_points: list[tuple[int, dict]], highlighted_index: int = -1 ) -> plt.Figure: """Create a pyplot of token values with optional highlighting.""" plt.style.use("ggplot") # Set theme to grid paper fig = plt.figure(figsize=(11, 5)) # Set figure size to 11x5 ax = fig.add_subplot(111) x = [0] y = [0] for i, v in eval_points: x.append(i + 1) y.append(v["confidence"]) ax.plot(x, y, "o--", color="#4698cf") for i, v in eval_points: if not v["buzz"]: continue confidence = v["confidence"] color = "green" if v["score"] else "red" ax.plot(i + 1, confidence, "o", color=color) if i >= len(tokens): print(f"Token index {i} is out of bounds for n_tokens: {len(tokens)}") ax.annotate(f"{tokens[i]}", (i + 1, confidence), textcoords="offset points", xytext=(0, 10), ha="center") if highlighted_index >= 0: # Add light vertical line for the highlighted token from 0 to 1 ax.axvline(x=highlighted_index + 1, color="#ff9900", linestyle="--", ymin=0, ymax=1) ax.set_title("Buzz Confidence") ax.set_xlabel("Token Index") ax.set_ylabel("Confidence") ax.set_xticks(x) ax.set_xticklabels(x) return fig def create_scatter_pyplot(token_positions: list[int], scores: list[int]) -> plt.Figure: """Create a scatter plot of token positions and scores.""" plt.style.use("ggplot") fig = plt.figure(figsize=(11, 5)) ax = fig.add_subplot(111) counts = Counter(zip(token_positions, scores)) X = [] Y = [] S = [] for (pos, score), size in counts.items(): X.append(pos) Y.append(score) S.append(size * 20) ax.scatter(X, Y, color="#4698cf", s=S) return fig def create_bonus_confidence_plot(parts: list[dict], model_outputs: list[dict]) -> plt.Figure: """Create confidence plot for bonus parts.""" plt.style.use("ggplot") fig = plt.figure(figsize=(10, 6)) ax = fig.add_subplot(111) # Plot confidence for each part x = range(1, len(parts) + 1) confidences = [output["confidence"] for output in model_outputs] scores = [output["score"] for output in model_outputs] # Plot confidence bars bars = ax.bar(x, confidences, color="#4698cf") # Color bars based on correctness for i, score in enumerate(scores): bars[i].set_color("green" if score == 1 else "red") ax.set_title("Part Confidence") ax.set_xlabel("Part Number") ax.set_ylabel("Confidence") ax.set_xticks(x) ax.set_xticklabels([f"Part {i}" for i in x]) return fig def update_tossup_plot(highlighted_index: int, state: str) -> pd.DataFrame: """Update the plot when a token is hovered; add a vertical line on the plot.""" try: if not state or state == "{}": logging.warning("Empty state provided to update_plot") return pd.DataFrame() highlighted_index = int(highlighted_index) if highlighted_index else None logging.info(f"Update plot triggered with token index: {highlighted_index}") data = json.loads(state) tokens = data.get("tokens", []) values = data.get("values", []) if not tokens or not values: logging.warning("No tokens or values found in state") return pd.DataFrame() # Create updated plot with highlighting of the token point # plot_data = create_line_plot(values, highlighted_index) plot_data = create_tossup_confidence_pyplot(tokens, values, highlighted_index) return plot_data except Exception as e: logging.error(f"Error updating plot: {e}") return pd.DataFrame()