Maharshi Gor commited on
Commit
cd9f5b3
·
1 Parent(s): f10a835

minor interface refactor

Browse files
src/components/quizbowl/bonus.py CHANGED
@@ -2,6 +2,7 @@ import json
2
  from typing import Any
3
 
4
  import gradio as gr
 
5
  import pandas as pd
6
  from datasets import Dataset
7
  from loguru import logger
@@ -36,7 +37,7 @@ def process_bonus_results(results: list[dict]) -> pd.DataFrame:
36
  )
37
 
38
 
39
- def initialize_eval_interface(example: dict, model_outputs: list[dict]):
40
  """Initialize the interface with example text."""
41
  try:
42
  html_content = create_bonus_html(example["leadin"], example["parts"])
@@ -45,12 +46,20 @@ def initialize_eval_interface(example: dict, model_outputs: list[dict]):
45
  plot_data = create_bonus_confidence_plot(example["parts"], model_outputs)
46
 
47
  # Store state
48
- state = json.dumps({"parts": example["parts"], "outputs": model_outputs})
49
 
50
- return html_content, plot_data, state
 
 
 
 
 
 
 
 
51
  except Exception as e:
52
  logger.exception(f"Error initializing interface: {e.args}")
53
- return f"<div>Error initializing interface: {str(e)}</div>", pd.DataFrame(), "{}"
54
 
55
 
56
  class BonusInterface:
@@ -64,7 +73,7 @@ class BonusInterface:
64
  self.model_options = model_options
65
  self.app = app
66
  self.defaults = defaults
67
- self.output_state = gr.State(value="{}")
68
  self.render()
69
 
70
  # ------------------------------------- LOAD PIPELINE STATE FROM BROWSER STATE -------------------------------------
@@ -75,10 +84,10 @@ class BonusInterface:
75
  state_dict = browser_state["bonus"].get("pipeline_state", {})
76
  pipeline_state = PipelineState.model_validate(state_dict)
77
  pipeline_state_dict = pipeline_state.model_dump()
78
- output_state = browser_state["bonus"].get("output_state", "{}")
79
  except Exception as e:
80
  logger.warning(f"Error loading presaved pipeline state: {e}")
81
- output_state = "{}"
82
  workflow = self.defaults["init_workflow"]
83
  pipeline_state_dict = PipelineState.from_workflow(workflow).model_dump()
84
  return browser_state, not pipeline_change, pipeline_state_dict, output_state
@@ -228,9 +237,10 @@ class BonusInterface:
228
  outputs = self.get_agent_outputs(example, pipeline_state)
229
 
230
  # Process results and prepare visualization data
231
- html_content, plot_data, output_state = initialize_eval_interface(example, outputs)
 
 
232
  df = process_bonus_results(outputs)
233
- step_outputs = [output["step_outputs"] for output in outputs]
234
 
235
  return (
236
  html_content,
 
2
  from typing import Any
3
 
4
  import gradio as gr
5
+ import numpy as np
6
  import pandas as pd
7
  from datasets import Dataset
8
  from loguru import logger
 
37
  )
38
 
39
 
40
+ def initialize_eval_interface(example: dict, model_outputs: list[dict], input_vars: list[str]):
41
  """Initialize the interface with example text."""
42
  try:
43
  html_content = create_bonus_html(example["leadin"], example["parts"])
 
46
  plot_data = create_bonus_confidence_plot(example["parts"], model_outputs)
47
 
48
  # Store state
49
+ state = {"parts": example["parts"], "outputs": model_outputs}
50
 
51
+ # Preparing step outputs for the model
52
+ step_outputs = {}
53
+ for i, output in enumerate(model_outputs):
54
+ key = f"part {i + 1}"
55
+ step_outputs[key] = {k: v for k, v in output["step_outputs"].items() if k not in input_vars}
56
+ if output["logprob"] is not None:
57
+ step_outputs[key]["output_probability"] = float(np.exp(output["logprob"]))
58
+
59
+ return html_content, plot_data, state, step_outputs
60
  except Exception as e:
61
  logger.exception(f"Error initializing interface: {e.args}")
62
+ return f"<div>Error initializing interface: {str(e)}</div>", pd.DataFrame(), {}, {}
63
 
64
 
65
  class BonusInterface:
 
73
  self.model_options = model_options
74
  self.app = app
75
  self.defaults = defaults
76
+ self.output_state = gr.State(value={})
77
  self.render()
78
 
79
  # ------------------------------------- LOAD PIPELINE STATE FROM BROWSER STATE -------------------------------------
 
84
  state_dict = browser_state["bonus"].get("pipeline_state", {})
85
  pipeline_state = PipelineState.model_validate(state_dict)
86
  pipeline_state_dict = pipeline_state.model_dump()
87
+ output_state = browser_state["bonus"].get("output_state", {})
88
  except Exception as e:
89
  logger.warning(f"Error loading presaved pipeline state: {e}")
90
+ output_state = {}
91
  workflow = self.defaults["init_workflow"]
92
  pipeline_state_dict = PipelineState.from_workflow(workflow).model_dump()
93
  return browser_state, not pipeline_change, pipeline_state_dict, output_state
 
237
  outputs = self.get_agent_outputs(example, pipeline_state)
238
 
239
  # Process results and prepare visualization data
240
+ html_content, plot_data, output_state, step_outputs = initialize_eval_interface(
241
+ example, outputs, pipeline_state.workflow.inputs
242
+ )
243
  df = process_bonus_results(outputs)
 
244
 
245
  return (
246
  html_content,
src/components/quizbowl/tossup.py CHANGED
@@ -37,12 +37,12 @@ class ScoredTossupResult(TossupResult):
37
  token_position: int # Position in the question where prediction was made
38
 
39
 
40
- def add_model_scores(model_outputs: list[dict], clean_answers: list[str], run_indices: list[int]) -> list[dict]:
41
  """Add model scores to the model outputs."""
42
- for output, run_idx in zip(model_outputs, run_indices):
43
  output["score"] = evaluate_prediction(output["answer"], clean_answers)
44
- output["token_position"] = run_idx + 1
45
- return model_outputs
46
 
47
 
48
  def prepare_buzz_evals(
@@ -61,7 +61,11 @@ def prepare_buzz_evals(
61
 
62
 
63
  def initialize_eval_interface(
64
- example: dict, model_outputs: list[dict], confidence_threshold: float, prob_threshold: float | None = None
 
 
 
 
65
  ):
66
  """Initialize the interface with example text."""
67
  try:
@@ -69,7 +73,7 @@ def initialize_eval_interface(
69
  run_indices = example["run_indices"]
70
  answer = example["answer_primary"]
71
  clean_answers = example["clean_answers"]
72
- eval_points = prepare_buzz_evals(run_indices, model_outputs)
73
 
74
  if not tokens:
75
  return "<div>No tokens found in the provided text.</div>", pd.DataFrame(), "{}"
@@ -77,12 +81,21 @@ def initialize_eval_interface(
77
  plot_data = create_tossup_confidence_pyplot(tokens, eval_points, confidence_threshold, prob_threshold)
78
 
79
  # Store tokens, values, and buzzes as JSON for later use
80
- state = json.dumps({"tokens": tokens, "values": eval_points})
81
-
82
- return html_content, plot_data, state
 
 
 
 
 
 
 
 
 
83
  except Exception as e:
84
  logger.exception(f"Error initializing interface: {e.args}")
85
- return f"<div>Error initializing interface: {str(e)}</div>", pd.DataFrame(), "{}"
86
 
87
 
88
  def process_tossup_results(results: list[dict]) -> pd.DataFrame:
@@ -119,7 +132,7 @@ class TossupInterface:
119
  self.model_options = model_options
120
  self.app = app
121
  self.defaults = defaults
122
- self.output_state = gr.State(value="{}")
123
  self.render()
124
 
125
  # ------------------------------------- LOAD PIPELINE STATE FROM BROWSER STATE -------------------------------------
@@ -130,10 +143,10 @@ class TossupInterface:
130
  state_dict = browser_state["tossup"].get("pipeline_state", {})
131
  pipeline_state = TossupPipelineState.model_validate(state_dict)
132
  pipeline_state_dict = pipeline_state.model_dump()
133
- output_state = browser_state["tossup"].get("output_state", "{}")
134
  except Exception as e:
135
  logger.warning(f"Error loading presaved pipeline state: {e}")
136
- output_state = "{}"
137
  workflow = self.defaults["init_workflow"]
138
  pipeline_state_dict = TossupPipelineState.from_workflow(workflow).model_dump()
139
  return browser_state, not pipeline_change, pipeline_state_dict, output_state
@@ -282,20 +295,10 @@ class TossupInterface:
282
  # Process results and prepare visualization data
283
  confidence_threshold = workflow.buzzer.confidence_threshold
284
  prob_threshold = workflow.buzzer.prob_threshold
285
- tokens_html, plot_data, output_state = initialize_eval_interface(
286
- example, outputs, confidence_threshold, prob_threshold
287
  )
288
  df = process_tossup_results(outputs)
289
- tokens = example["question"].split()
290
- step_outputs = {}
291
- for output in outputs:
292
- pos = output["token_position"]
293
- token = tokens[pos - 1]
294
- key = f"{pos}:{token}"
295
- step_outputs[key] = {k: v for k, v in output["step_outputs"].items() if k not in workflow.inputs}
296
- if output["logprob"] is not None:
297
- step_outputs[key]["logprob"] = output["logprob"]
298
- step_outputs[key]["prob"] = float(np.exp(output["logprob"]))
299
 
300
  return (
301
  tokens_html,
 
37
  token_position: int # Position in the question where prediction was made
38
 
39
 
40
+ def add_model_scores(run_outputs: list[dict], clean_answers: list[str], run_indices: list[int]) -> list[dict]:
41
  """Add model scores to the model outputs."""
42
+ for output in run_outputs:
43
  output["score"] = evaluate_prediction(output["answer"], clean_answers)
44
+ output["token_position"] = run_indices[output["position"] - 1]
45
+ return run_outputs
46
 
47
 
48
  def prepare_buzz_evals(
 
61
 
62
 
63
  def initialize_eval_interface(
64
+ example: dict,
65
+ run_outputs: list[dict],
66
+ input_vars: list,
67
+ confidence_threshold: float,
68
+ prob_threshold: float | None = None,
69
  ):
70
  """Initialize the interface with example text."""
71
  try:
 
73
  run_indices = example["run_indices"]
74
  answer = example["answer_primary"]
75
  clean_answers = example["clean_answers"]
76
+ eval_points = [(o["token_position"], o) for o in run_outputs]
77
 
78
  if not tokens:
79
  return "<div>No tokens found in the provided text.</div>", pd.DataFrame(), "{}"
 
81
  plot_data = create_tossup_confidence_pyplot(tokens, eval_points, confidence_threshold, prob_threshold)
82
 
83
  # Store tokens, values, and buzzes as JSON for later use
84
+ state = {"tokens": tokens, "values": eval_points}
85
+
86
+ # Preparing step outputs for the model
87
+ step_outputs = {}
88
+ for output in run_outputs:
89
+ tok_pos = output["token_position"]
90
+ key = "{pos}:{token}".format(pos=tok_pos + 1, token=tokens[tok_pos])
91
+ step_outputs[key] = {k: v for k, v in output["step_outputs"].items() if k not in input_vars}
92
+ if output["logprob"] is not None:
93
+ step_outputs[key]["output_probability"] = float(np.exp(output["logprob"]))
94
+
95
+ return html_content, plot_data, state, step_outputs
96
  except Exception as e:
97
  logger.exception(f"Error initializing interface: {e.args}")
98
+ return f"<div>Error initializing interface: {str(e)}</div>", pd.DataFrame(), "{}", {}
99
 
100
 
101
  def process_tossup_results(results: list[dict]) -> pd.DataFrame:
 
132
  self.model_options = model_options
133
  self.app = app
134
  self.defaults = defaults
135
+ self.output_state = gr.State(value={})
136
  self.render()
137
 
138
  # ------------------------------------- LOAD PIPELINE STATE FROM BROWSER STATE -------------------------------------
 
143
  state_dict = browser_state["tossup"].get("pipeline_state", {})
144
  pipeline_state = TossupPipelineState.model_validate(state_dict)
145
  pipeline_state_dict = pipeline_state.model_dump()
146
+ output_state = browser_state["tossup"].get("output_state", {})
147
  except Exception as e:
148
  logger.warning(f"Error loading presaved pipeline state: {e}")
149
+ output_state = {}
150
  workflow = self.defaults["init_workflow"]
151
  pipeline_state_dict = TossupPipelineState.from_workflow(workflow).model_dump()
152
  return browser_state, not pipeline_change, pipeline_state_dict, output_state
 
295
  # Process results and prepare visualization data
296
  confidence_threshold = workflow.buzzer.confidence_threshold
297
  prob_threshold = workflow.buzzer.prob_threshold
298
+ tokens_html, plot_data, output_state, step_outputs = initialize_eval_interface(
299
+ example, outputs, workflow.inputs, confidence_threshold, prob_threshold
300
  )
301
  df = process_tossup_results(outputs)
 
 
 
 
 
 
 
 
 
 
302
 
303
  return (
304
  tokens_html,