Spaces:

qanta-challenge
/

quizbowl-submission

Running

quizbowl-submission / src /components /quizbowl /plotting.py

Maharshi Gor

Adds quizbowl pipeline support for bonus and tossup questions

02b7dec 3 months ago

10.7 kB

	import json
	import logging
	import re
	from collections import Counter

	import matplotlib.pyplot as plt
	import pandas as pd


	def _make_answer_html(answer: str, clean_answers: list[str] = []) -> str:
	clean_answers = [a for a in clean_answers if len(a.split()) <= 6 and a != answer]
	additional_answers_html = ""
	if clean_answers:
	additional_answers_html = f"<span class='bonus-answer-text'> [or {', '.join(clean_answers)}]</span>"
	return f"""
	<div class='bonus-answer'>
	<span class='bonus-answer-label'>Answer: </span>
	<span class='bonus-answer-text'>{answer}</span>
	{additional_answers_html}
	</div>
	"""


	def _get_token_classes(confidence, buzz, score) -> str:
	if confidence is None:
	return "token"
	elif not buzz:
	return "token guess-point no-buzz"
	else:
	return f"token guess-point buzz-{score}"


	def _create_token_tooltip_html(values) -> str:
	if not values:
	return ""
	confidence = values.get("confidence", 0)
	buzz = values.get("buzz", 0)
	score = values.get("score", 0)
	answer = values.get("answer", "")
	answer_tokens = answer.split()
	if len(answer_tokens) > 10:
	k = len(answer_tokens) - 10
	answer = " ".join(answer_tokens[:10]) + f"...[{k} more words]"

	color = "#a3c9a3" if score else "#ebbec4" # Light green for correct, light pink for incorrect

	return f"""
	<div class="tooltip card" style="background-color: {color}; border-radius: 8px; padding: 12px; box-shadow: 2px 4px 8px rgba(0, 0, 0, 0.15);">
	<div class="tooltip-content" style="font-family: 'Arial', sans-serif; color: #000;">
	<h4 style="margin: 0 0 8px; color: #000;">💡 Answer</h4>
	<p style="font-weight: bold; margin: 0 0 8px; color: #000;">{answer}</p>
	<p style="margin: 0 0 4px; color: #000;">📊 <b style="color: #000;">Confidence:</b> {confidence:.2f}</p>
	<p style="margin: 0; color: #000;">🔍 <b style="color: #000;">Status:</b> {"✅ Correct" if score else "❌ Incorrect" if buzz else "🚫 No Buzz"}</p>
	</div>
	</div>
	"""


	def create_token_html(token: str, values: dict, i: int) -> str:
	confidence = values.get("confidence", None)
	buzz = values.get("buzz", 0)
	score = values.get("score", 0)

	# Replace non-word characters for proper display in HTML
	display_token = f"{token} 🚨" if buzz else f"{token} 💭" if values else token
	if not re.match(r"\w+", token):
	display_token = token.replace(" ", " ")

	css_class = _get_token_classes(confidence, buzz, score)
	# Add tooltip if we have values for this token
	tooltip_html = _create_token_tooltip_html(values)

	token_html = f'<span id="token-{i}" class="{css_class}" data-index="{i}">{display_token}{tooltip_html}</span>'
	# if i in marker_indices:
	# token_html += "<span style='color: crimson;'>\|</span>"
	return token_html


	def create_tossup_html(
	tokens: list[str],
	answer_primary: str,
	clean_answers: list[str],
	marker_indices: list[int] = [],
	eval_points: list[tuple[int, dict]] = [],
	) -> str:
	"""Create HTML for tokens with hover capability and a colored header for the answer."""
	try:
	ep = dict(eval_points)
	marker_indices = set(marker_indices)

	html_tokens = []
	for i, token in enumerate(tokens):
	token_html = create_token_html(token, ep.get(i, {}), i + 1)
	html_tokens.append(token_html)

	answer_html = _make_answer_html(answer_primary, clean_answers)
	return f"""
	<div class='bonus-container'>
	<div class='bonus-card'>
	<div class='tossup-question'>
	{"".join(html_tokens)}
	</div>
	{answer_html}
	</div>
	</div>
	"""
	except Exception as e:
	logging.error(f"Error creating token HTML: {e}", exc_info=True)
	return f"<div class='token-container'>Error creating tokens: {str(e)}</div>"


	def create_bonus_html(leadin: str, parts: list[dict]) -> str:
	# Create HTML for leadin and parts with answers
	leadin_html = f"<div class='bonus-leadin'>{leadin}</div>"
	parts_html = []

	for i, part in enumerate(parts):
	question_text = part["part"]
	answer_html = _make_answer_html(part["answer_primary"], part["clean_answers"])

	"<div class='bonus-part-number'>Part {i + 1}</div>"
	part_html = f"""
	<div class='bonus-part'>
	<div class='bonus-part-text'><b>#{i + 1}.</b> {question_text}</div>
	{answer_html}
	</div>
	"""
	parts_html.append(part_html)

	html_content = f"""
	<div class='bonus-container'>
	<div class='bonus-card'>
	{leadin_html}
	{"".join(parts_html)}
	</div>
	</div>
	"""

	# Format clean answers for the answer display
	clean_answers = []
	for i, part in enumerate(parts):
	part_answers = [a for a in part["clean_answers"] if len(a.split()) <= 6]
	clean_answers.append(f"{i + 1}. {', '.join(part_answers)}")

	return html_content


	def create_line_plot(eval_points: list[tuple[int, dict]], highlighted_index: int = -1) -> pd.DataFrame:
	"""Create a Gradio LinePlot of token values with optional highlighting using DataFrame."""
	try:
	# Create base confidence data
	data = []

	# Add buzz points to the plot
	for i, (v, b) in eval_points:
	color = "#ff4444" if b == 0 else "#228b22"
	data.append(
	{
	"position": i,
	"value": v,
	"type": "buzz",
	"highlight": True,
	"color": color,
	}
	)

	if highlighted_index >= 0:
	# Add vertical line for the highlighted token
	data.extend(
	[
	{
	"position": highlighted_index,
	"value": 0,
	"type": "hover-line",
	"color": "#000000",
	"highlight": True,
	},
	{
	"position": highlighted_index,
	"value": 1,
	"type": "hover-line",
	"color": "#000000",
	"highlight": True,
	},
	]
	)

	return pd.DataFrame(data)
	except Exception as e:
	logging.error(f"Error creating line plot: {e}", exc_info=True)
	# Return an empty DataFrame with the expected columns
	return pd.DataFrame(columns=["position", "value", "type", "highlight", "color"])


	def create_tossup_confidence_pyplot(
	tokens: list[str], eval_points: list[tuple[int, dict]], highlighted_index: int = -1
	) -> plt.Figure:
	"""Create a pyplot of token values with optional highlighting."""
	plt.style.use("ggplot") # Set theme to grid paper
	fig = plt.figure(figsize=(11, 5)) # Set figure size to 11x5
	ax = fig.add_subplot(111)
	x = [0]
	y = [0]
	for i, v in eval_points:
	x.append(i + 1)
	y.append(v["confidence"])

	ax.plot(x, y, "o--", color="#4698cf")
	for i, v in eval_points:
	if not v["buzz"]:
	continue
	confidence = v["confidence"]
	color = "green" if v["score"] else "red"
	ax.plot(i + 1, confidence, "o", color=color)
	if i >= len(tokens):
	print(f"Token index {i} is out of bounds for n_tokens: {len(tokens)}")
	ax.annotate(f"{tokens[i]}", (i + 1, confidence), textcoords="offset points", xytext=(0, 10), ha="center")

	if highlighted_index >= 0:
	# Add light vertical line for the highlighted token from 0 to 1
	ax.axvline(x=highlighted_index + 1, color="#ff9900", linestyle="--", ymin=0, ymax=1)

	ax.set_title("Buzz Confidence")
	ax.set_xlabel("Token Index")
	ax.set_ylabel("Confidence")
	ax.set_xticks(x)
	ax.set_xticklabels(x)
	return fig


	def create_scatter_pyplot(token_positions: list[int], scores: list[int]) -> plt.Figure:
	"""Create a scatter plot of token positions and scores."""
	plt.style.use("ggplot")
	fig = plt.figure(figsize=(11, 5))
	ax = fig.add_subplot(111)

	counts = Counter(zip(token_positions, scores))
	X = []
	Y = []
	S = []
	for (pos, score), size in counts.items():
	X.append(pos)
	Y.append(score)
	S.append(size * 20)

	ax.scatter(X, Y, color="#4698cf", s=S)

	return fig


	def create_bonus_confidence_plot(parts: list[dict], model_outputs: list[dict]) -> plt.Figure:
	"""Create confidence plot for bonus parts."""
	plt.style.use("ggplot")
	fig = plt.figure(figsize=(10, 6))
	ax = fig.add_subplot(111)

	# Plot confidence for each part
	x = range(1, len(parts) + 1)
	confidences = [output["confidence"] for output in model_outputs]
	scores = [output["score"] for output in model_outputs]

	# Plot confidence bars
	bars = ax.bar(x, confidences, color="#4698cf")

	# Color bars based on correctness
	for i, score in enumerate(scores):
	bars[i].set_color("green" if score == 1 else "red")

	ax.set_title("Part Confidence")
	ax.set_xlabel("Part Number")
	ax.set_ylabel("Confidence")
	ax.set_xticks(x)
	ax.set_xticklabels([f"Part {i}" for i in x])

	return fig


	def update_tossup_plot(highlighted_index: int, state: str) -> pd.DataFrame:
	"""Update the plot when a token is hovered; add a vertical line on the plot."""
	try:
	if not state or state == "{}":
	logging.warning("Empty state provided to update_plot")
	return pd.DataFrame()

	highlighted_index = int(highlighted_index) if highlighted_index else None
	logging.info(f"Update plot triggered with token index: {highlighted_index}")

	data = json.loads(state)
	tokens = data.get("tokens", [])
	values = data.get("values", [])

	if not tokens or not values:
	logging.warning("No tokens or values found in state")
	return pd.DataFrame()

	# Create updated plot with highlighting of the token point
	# plot_data = create_line_plot(values, highlighted_index)
	plot_data = create_tossup_confidence_pyplot(tokens, values, highlighted_index)
	return plot_data
	except Exception as e:
	logging.error(f"Error updating plot: {e}")
	return pd.DataFrame()