Maharshi Gor commited on
Commit
1eeda1d
·
1 Parent(s): f9589f4

Workflow update.

Browse files

refactored single_run, eval on last question run too.

src/workflows/factory.py CHANGED
@@ -15,8 +15,20 @@ Given a question clue, output your most likely guess in a couple words with a ca
15
  """
16
 
17
 
18
- def create_simple_workflow():
19
- pass
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
 
22
  def create_first_step_input_fields() -> list[InputField]:
 
15
  """
16
 
17
 
18
+ def create_empty_bonus_workflow():
19
+ return Workflow(
20
+ inputs=["leadin", "part"],
21
+ outputs={"answer": None, "confidence": None, "explanation": None},
22
+ steps={},
23
+ )
24
+
25
+
26
+ def create_empty_tossup_workflow():
27
+ return TossupWorkflow(
28
+ inputs=["question_text"],
29
+ outputs={"answer": None, "confidence": None},
30
+ steps={},
31
+ )
32
 
33
 
34
  def create_first_step_input_fields() -> list[InputField]:
src/workflows/qb_agents.py CHANGED
@@ -20,6 +20,7 @@ def _get_workflow_response(
20
  class TossupResult(TypedDict):
21
  answer: str
22
  confidence: float
 
23
  buzz: bool
24
  question_fragment: str
25
  position: int
@@ -62,6 +63,27 @@ class QuizBowlTossupAgent:
62
  if out_var not in workflow.outputs:
63
  raise ValueError(f"Output variable {out_var} not found in workflow outputs")
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  def run(self, question_runs: list[str], early_stop: bool = True) -> Iterable[TossupResult]:
66
  """Process a tossup question and decide when to buzz based on confidence.
67
 
@@ -82,28 +104,14 @@ class QuizBowlTossupAgent:
82
  """
83
  for i, question_text in enumerate(question_runs):
84
  # Execute the complete workflow
85
- answer_var_step = self.workflow.outputs["answer"].split(".")[0]
86
- workflow_output, response_time = _get_workflow_response(
87
- self.workflow, {self.external_input_variable: question_text}, logprob_step=answer_var_step
88
- )
89
- final_outputs = workflow_output["final_outputs"]
90
- buzz = self.workflow.buzzer.run(final_outputs["confidence"], logprob=workflow_output["logprob"])
91
- result: TossupResult = {
92
- "position": i + 1,
93
- "answer": final_outputs["answer"],
94
- "confidence": final_outputs["confidence"],
95
- "logprob": workflow_output["logprob"],
96
- "buzz": buzz,
97
- "question_fragment": question_text,
98
- "step_contents": workflow_output["step_contents"],
99
- "step_outputs": workflow_output["intermediate_outputs"], # Include intermediate step outputs
100
- "response_time": response_time,
101
- }
102
 
103
  yield result
104
 
105
  # If we've reached the confidence threshold, buzz and stop
106
  if early_stop and result["buzz"]:
 
 
107
  return
108
 
109
 
 
20
  class TossupResult(TypedDict):
21
  answer: str
22
  confidence: float
23
+ logprob: float | None
24
  buzz: bool
25
  question_fragment: str
26
  position: int
 
63
  if out_var not in workflow.outputs:
64
  raise ValueError(f"Output variable {out_var} not found in workflow outputs")
65
 
66
+ def _single_run(self, question_run: str, position: int) -> TossupResult:
67
+ """Process a single question run."""
68
+ answer_var_step = self.workflow.outputs["answer"].split(".")[0]
69
+ workflow_output, response_time = _get_workflow_response(
70
+ self.workflow, {self.external_input_variable: question_run}, logprob_step=answer_var_step
71
+ )
72
+ final_outputs = workflow_output["final_outputs"]
73
+ buzz = self.workflow.buzzer.run(final_outputs["confidence"], logprob=workflow_output["logprob"])
74
+ result: TossupResult = {
75
+ "position": position,
76
+ "answer": final_outputs["answer"],
77
+ "confidence": final_outputs["confidence"],
78
+ "logprob": workflow_output["logprob"],
79
+ "buzz": buzz,
80
+ "question_fragment": question_run,
81
+ "step_contents": workflow_output["step_contents"],
82
+ "step_outputs": workflow_output["intermediate_outputs"], # Include intermediate step outputs
83
+ "response_time": response_time,
84
+ }
85
+ return result
86
+
87
  def run(self, question_runs: list[str], early_stop: bool = True) -> Iterable[TossupResult]:
88
  """Process a tossup question and decide when to buzz based on confidence.
89
 
 
104
  """
105
  for i, question_text in enumerate(question_runs):
106
  # Execute the complete workflow
107
+ result = self._single_run(question_text, i + 1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
  yield result
110
 
111
  # If we've reached the confidence threshold, buzz and stop
112
  if early_stop and result["buzz"]:
113
+ if i + 1 < len(question_runs):
114
+ yield self._single_run(question_runs[-1], len(question_runs))
115
  return
116
 
117
 
src/workflows/structs.py CHANGED
@@ -307,7 +307,7 @@ class Buzzer(BaseModel):
307
  """Configuration for when to buzz in a tossup question."""
308
 
309
  method: BuzzerMethod = BuzzerMethod.AND # Logic to combine thresholds
310
- confidence_threshold: float = Field(default=0.8, ge=0.0, le=1.0) # Minimum confidence to trigger a buzz
311
  prob_threshold: float | None = None # Optional log probability threshold
312
 
313
  class Config:
@@ -345,7 +345,7 @@ class Buzzer(BaseModel):
345
  class TossupWorkflow(Workflow):
346
  """Workflow specialized for tossup questions with buzzing capability."""
347
 
348
- buzzer: Buzzer
349
 
350
  def get_answer_model(self, answer_var: str | None = None) -> str | None:
351
  answer_var = answer_var or self.outputs["answer"]
 
307
  """Configuration for when to buzz in a tossup question."""
308
 
309
  method: BuzzerMethod = BuzzerMethod.AND # Logic to combine thresholds
310
+ confidence_threshold: float = Field(default=0.5, ge=0.0, le=1.0) # Minimum confidence to trigger a buzz
311
  prob_threshold: float | None = None # Optional log probability threshold
312
 
313
  class Config:
 
345
  class TossupWorkflow(Workflow):
346
  """Workflow specialized for tossup questions with buzzing capability."""
347
 
348
+ buzzer: Buzzer = Field(default_factory=Buzzer)
349
 
350
  def get_answer_model(self, answer_var: str | None = None) -> str | None:
351
  answer_var = answer_var or self.outputs["answer"]