Maharshi Gor
commited on
Commit
·
1eeda1d
1
Parent(s):
f9589f4
Workflow update.
Browse filesrefactored single_run, eval on last question run too.
- src/workflows/factory.py +14 -2
- src/workflows/qb_agents.py +25 -17
- src/workflows/structs.py +2 -2
src/workflows/factory.py
CHANGED
@@ -15,8 +15,20 @@ Given a question clue, output your most likely guess in a couple words with a ca
|
|
15 |
"""
|
16 |
|
17 |
|
18 |
-
def
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
|
22 |
def create_first_step_input_fields() -> list[InputField]:
|
|
|
15 |
"""
|
16 |
|
17 |
|
18 |
+
def create_empty_bonus_workflow():
|
19 |
+
return Workflow(
|
20 |
+
inputs=["leadin", "part"],
|
21 |
+
outputs={"answer": None, "confidence": None, "explanation": None},
|
22 |
+
steps={},
|
23 |
+
)
|
24 |
+
|
25 |
+
|
26 |
+
def create_empty_tossup_workflow():
|
27 |
+
return TossupWorkflow(
|
28 |
+
inputs=["question_text"],
|
29 |
+
outputs={"answer": None, "confidence": None},
|
30 |
+
steps={},
|
31 |
+
)
|
32 |
|
33 |
|
34 |
def create_first_step_input_fields() -> list[InputField]:
|
src/workflows/qb_agents.py
CHANGED
@@ -20,6 +20,7 @@ def _get_workflow_response(
|
|
20 |
class TossupResult(TypedDict):
|
21 |
answer: str
|
22 |
confidence: float
|
|
|
23 |
buzz: bool
|
24 |
question_fragment: str
|
25 |
position: int
|
@@ -62,6 +63,27 @@ class QuizBowlTossupAgent:
|
|
62 |
if out_var not in workflow.outputs:
|
63 |
raise ValueError(f"Output variable {out_var} not found in workflow outputs")
|
64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
def run(self, question_runs: list[str], early_stop: bool = True) -> Iterable[TossupResult]:
|
66 |
"""Process a tossup question and decide when to buzz based on confidence.
|
67 |
|
@@ -82,28 +104,14 @@ class QuizBowlTossupAgent:
|
|
82 |
"""
|
83 |
for i, question_text in enumerate(question_runs):
|
84 |
# Execute the complete workflow
|
85 |
-
|
86 |
-
workflow_output, response_time = _get_workflow_response(
|
87 |
-
self.workflow, {self.external_input_variable: question_text}, logprob_step=answer_var_step
|
88 |
-
)
|
89 |
-
final_outputs = workflow_output["final_outputs"]
|
90 |
-
buzz = self.workflow.buzzer.run(final_outputs["confidence"], logprob=workflow_output["logprob"])
|
91 |
-
result: TossupResult = {
|
92 |
-
"position": i + 1,
|
93 |
-
"answer": final_outputs["answer"],
|
94 |
-
"confidence": final_outputs["confidence"],
|
95 |
-
"logprob": workflow_output["logprob"],
|
96 |
-
"buzz": buzz,
|
97 |
-
"question_fragment": question_text,
|
98 |
-
"step_contents": workflow_output["step_contents"],
|
99 |
-
"step_outputs": workflow_output["intermediate_outputs"], # Include intermediate step outputs
|
100 |
-
"response_time": response_time,
|
101 |
-
}
|
102 |
|
103 |
yield result
|
104 |
|
105 |
# If we've reached the confidence threshold, buzz and stop
|
106 |
if early_stop and result["buzz"]:
|
|
|
|
|
107 |
return
|
108 |
|
109 |
|
|
|
20 |
class TossupResult(TypedDict):
|
21 |
answer: str
|
22 |
confidence: float
|
23 |
+
logprob: float | None
|
24 |
buzz: bool
|
25 |
question_fragment: str
|
26 |
position: int
|
|
|
63 |
if out_var not in workflow.outputs:
|
64 |
raise ValueError(f"Output variable {out_var} not found in workflow outputs")
|
65 |
|
66 |
+
def _single_run(self, question_run: str, position: int) -> TossupResult:
|
67 |
+
"""Process a single question run."""
|
68 |
+
answer_var_step = self.workflow.outputs["answer"].split(".")[0]
|
69 |
+
workflow_output, response_time = _get_workflow_response(
|
70 |
+
self.workflow, {self.external_input_variable: question_run}, logprob_step=answer_var_step
|
71 |
+
)
|
72 |
+
final_outputs = workflow_output["final_outputs"]
|
73 |
+
buzz = self.workflow.buzzer.run(final_outputs["confidence"], logprob=workflow_output["logprob"])
|
74 |
+
result: TossupResult = {
|
75 |
+
"position": position,
|
76 |
+
"answer": final_outputs["answer"],
|
77 |
+
"confidence": final_outputs["confidence"],
|
78 |
+
"logprob": workflow_output["logprob"],
|
79 |
+
"buzz": buzz,
|
80 |
+
"question_fragment": question_run,
|
81 |
+
"step_contents": workflow_output["step_contents"],
|
82 |
+
"step_outputs": workflow_output["intermediate_outputs"], # Include intermediate step outputs
|
83 |
+
"response_time": response_time,
|
84 |
+
}
|
85 |
+
return result
|
86 |
+
|
87 |
def run(self, question_runs: list[str], early_stop: bool = True) -> Iterable[TossupResult]:
|
88 |
"""Process a tossup question and decide when to buzz based on confidence.
|
89 |
|
|
|
104 |
"""
|
105 |
for i, question_text in enumerate(question_runs):
|
106 |
# Execute the complete workflow
|
107 |
+
result = self._single_run(question_text, i + 1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
|
109 |
yield result
|
110 |
|
111 |
# If we've reached the confidence threshold, buzz and stop
|
112 |
if early_stop and result["buzz"]:
|
113 |
+
if i + 1 < len(question_runs):
|
114 |
+
yield self._single_run(question_runs[-1], len(question_runs))
|
115 |
return
|
116 |
|
117 |
|
src/workflows/structs.py
CHANGED
@@ -307,7 +307,7 @@ class Buzzer(BaseModel):
|
|
307 |
"""Configuration for when to buzz in a tossup question."""
|
308 |
|
309 |
method: BuzzerMethod = BuzzerMethod.AND # Logic to combine thresholds
|
310 |
-
confidence_threshold: float = Field(default=0.
|
311 |
prob_threshold: float | None = None # Optional log probability threshold
|
312 |
|
313 |
class Config:
|
@@ -345,7 +345,7 @@ class Buzzer(BaseModel):
|
|
345 |
class TossupWorkflow(Workflow):
|
346 |
"""Workflow specialized for tossup questions with buzzing capability."""
|
347 |
|
348 |
-
buzzer: Buzzer
|
349 |
|
350 |
def get_answer_model(self, answer_var: str | None = None) -> str | None:
|
351 |
answer_var = answer_var or self.outputs["answer"]
|
|
|
307 |
"""Configuration for when to buzz in a tossup question."""
|
308 |
|
309 |
method: BuzzerMethod = BuzzerMethod.AND # Logic to combine thresholds
|
310 |
+
confidence_threshold: float = Field(default=0.5, ge=0.0, le=1.0) # Minimum confidence to trigger a buzz
|
311 |
prob_threshold: float | None = None # Optional log probability threshold
|
312 |
|
313 |
class Config:
|
|
|
345 |
class TossupWorkflow(Workflow):
|
346 |
"""Workflow specialized for tossup questions with buzzing capability."""
|
347 |
|
348 |
+
buzzer: Buzzer = Field(default_factory=Buzzer)
|
349 |
|
350 |
def get_answer_model(self, answer_var: str | None = None) -> str | None:
|
351 |
answer_var = answer_var or self.outputs["answer"]
|