Maharshi Gor commited on
Commit
38e3800
·
1 Parent(s): 2900a81

Refactored single step and multi step qb agents into one module as QB Agents.

Browse files
src/components/quizbowl/bonus.py CHANGED
@@ -9,8 +9,7 @@ from loguru import logger
9
  from components.model_pipeline.model_pipeline import PipelineInterface, PipelineState, PipelineUIState
10
  from display.formatting import styled_error
11
  from submission import submit
12
- from workflows.qb.multi_step_agent import MultiStepBonusAgent
13
- from workflows.qb.simple_agent import SimpleBonusAgent
14
  from workflows.structs import ModelStep, Workflow
15
 
16
  from . import commons
@@ -211,11 +210,7 @@ class BonusInterface:
211
  """Get the model outputs for a given question ID."""
212
  outputs = []
213
  leadin = example["leadin"]
214
- workflow = pipeline_state.workflow
215
- if len(workflow.steps) > 1:
216
- agent = MultiStepBonusAgent(workflow)
217
- else:
218
- agent = SimpleBonusAgent(workflow)
219
 
220
  for i, part in enumerate(example["parts"]):
221
  # Run model for each part
 
9
  from components.model_pipeline.model_pipeline import PipelineInterface, PipelineState, PipelineUIState
10
  from display.formatting import styled_error
11
  from submission import submit
12
+ from workflows.qb_agents import QuizBowlBonusAgent
 
13
  from workflows.structs import ModelStep, Workflow
14
 
15
  from . import commons
 
210
  """Get the model outputs for a given question ID."""
211
  outputs = []
212
  leadin = example["leadin"]
213
+ agent = QuizBowlBonusAgent(pipeline_state.workflow)
 
 
 
 
214
 
215
  for i, part in enumerate(example["parts"]):
216
  # Run model for each part
src/components/quizbowl/tossup.py CHANGED
@@ -10,8 +10,7 @@ from loguru import logger
10
  from components.model_pipeline.model_pipeline import PipelineInterface, PipelineState, PipelineUIState
11
  from display.formatting import styled_error
12
  from submission import submit
13
- from workflows.qb.multi_step_agent import MultiStepTossupAgent
14
- from workflows.qb.simple_agent import SimpleTossupAgent
15
  from workflows.structs import ModelStep, Workflow
16
 
17
  from . import commons
@@ -275,12 +274,7 @@ class TossupInterface:
275
  tokens = example["question"].split()
276
  for run_idx in example["run_indices"]:
277
  question_runs.append(" ".join(tokens[: run_idx + 1]))
278
-
279
- workflow = pipeline_state.workflow
280
- if len(workflow.steps) > 1:
281
- agent = MultiStepTossupAgent(workflow, buzz_threshold)
282
- else:
283
- agent = SimpleTossupAgent(workflow, buzz_threshold)
284
  outputs = list(agent.run(question_runs, early_stop=early_stop))
285
  outputs = add_model_scores(outputs, example["clean_answers"], example["run_indices"])
286
  return outputs
 
10
  from components.model_pipeline.model_pipeline import PipelineInterface, PipelineState, PipelineUIState
11
  from display.formatting import styled_error
12
  from submission import submit
13
+ from workflows.qb_agents import QuizBowlTossupAgent
 
14
  from workflows.structs import ModelStep, Workflow
15
 
16
  from . import commons
 
274
  tokens = example["question"].split()
275
  for run_idx in example["run_indices"]:
276
  question_runs.append(" ".join(tokens[: run_idx + 1]))
277
+ agent = QuizBowlTossupAgent(pipeline_state.workflow, buzz_threshold)
 
 
 
 
 
278
  outputs = list(agent.run(question_runs, early_stop=early_stop))
279
  outputs = add_model_scores(outputs, example["clean_answers"], example["run_indices"])
280
  return outputs
src/workflows/executors.py CHANGED
@@ -1,5 +1,4 @@
1
  # %%
2
- import json
3
  from typing import Any
4
 
5
  import pydantic
@@ -178,42 +177,10 @@ def execute_model_step(
178
  return outputs
179
 
180
 
181
- # Example usage
182
- if __name__ == "__main__":
183
- # Define a simple model step
184
- model_step = ModelStep(
185
- id="step1",
186
- model="gpt-4o-mini",
187
- provider="OpenAI",
188
- call_type="llm",
189
- system_prompt="You are a simple NLP tool that takes a string, and a number N, and return the first N entities in the string, and the total count of entities in the string.",
190
- input_fields=[
191
- InputField(name="sentence", description="The sentence to process", variable="sentence", func=None),
192
- InputField(name="n", description="The number of entities to return", variable="n", func=None),
193
- ],
194
- output_fields=[
195
- OutputField(
196
- name="entities",
197
- description="The first N entities in the string as a list of strings",
198
- type="list[str]",
199
- func=None,
200
- ),
201
- OutputField(name="count", description="The total count of entities in the string", type="int", func=None),
202
- ],
203
- )
204
-
205
- # Define processed inputs
206
- processed_inputs = {"sentence": "Abdul Akbar is a good person, but Jesus is the son of God.", "n": 3}
207
-
208
- # Execute the model step
209
- outputs = execute_model_step(model_step, processed_inputs)
210
- print(outputs)
211
-
212
-
213
  # %%
214
- def execute_workflow(
215
  workflow: Workflow, input_values: dict[str, Any], return_full_content: bool = False
216
- ) -> dict[str, Any] | tuple[dict[str, Any], str]:
217
  """
218
  Execute the given workflow as a computational graph.
219
 
@@ -234,10 +201,14 @@ def execute_workflow(
234
  dependencies, and input/output specifications.
235
  input_values (dict[str, Any]): External input values to be used by the workflow.
236
  Keys should match the required workflow.inputs.
 
 
237
 
238
  Returns:
239
- dict[str, Any]: A dictionary of the workflow's outputs, with keys matching
240
- the variables defined in workflow.outputs.
 
 
241
 
242
  Raises:
243
  UnknownVariableError: If an input_field references a variable that is not
@@ -255,12 +226,12 @@ def execute_workflow(
255
  ... "analyze": ModelStep(...) # A step that analyzes the entities
256
  ... },
257
  ... inputs=["text"],
258
- ... outputs=["analyze.sentiment", "extract.entities"]
259
  ... )
260
- >>> result = execute_workflow(workflow, {"text": "Apple is launching a new product tomorrow."})
261
- >>> print(result["analyze.sentiment"])
262
  "positive"
263
- >>> print(result["extract.entities"])
264
  ["Apple", "product"]
265
  """
266
  # Step 1: Pre-populate computed values with external workflow inputs.
@@ -280,11 +251,15 @@ def execute_workflow(
280
  execution_order = topological_sort(dependencies)
281
 
282
  # Step 4: Execute steps in topological order.
 
283
  for step_id in execution_order:
284
  step = workflow.steps[step_id]
285
 
 
286
  # Execute the step
287
- outputs = execute_model_step(step, computed_values)
 
 
288
  outputs = {f"{step_id}.{k}": v for k, v in outputs.items()}
289
  computed_values.update(outputs)
290
 
@@ -295,7 +270,77 @@ def execute_workflow(
295
  raise WorkflowError(f"Workflow output variable {var} was not produced")
296
  final_outputs[target] = computed_values[var]
297
 
298
- return final_outputs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
 
300
 
301
  def run_examples():
@@ -438,3 +483,34 @@ if __name__ == "__main__":
438
  run_examples()
439
 
440
  # %%
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # %%
 
2
  from typing import Any
3
 
4
  import pydantic
 
177
  return outputs
178
 
179
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  # %%
181
+ def execute_multi_step_workflow(
182
  workflow: Workflow, input_values: dict[str, Any], return_full_content: bool = False
183
+ ) -> tuple[dict[str, Any], dict[str, Any], dict[str, Any]]:
184
  """
185
  Execute the given workflow as a computational graph.
186
 
 
201
  dependencies, and input/output specifications.
202
  input_values (dict[str, Any]): External input values to be used by the workflow.
203
  Keys should match the required workflow.inputs.
204
+ return_full_content (bool, optional): If True, returns the full content of each step.
205
+ Defaults to False.
206
 
207
  Returns:
208
+ A tuple containing:
209
+ - A dictionary of the workflow's outputs, with keys matching the variables defined in workflow.outputs.
210
+ - A dictionary of all computed values during workflow execution, including intermediate results.
211
+ - A dictionary of step contents, only populated if return_full_content is True.
212
 
213
  Raises:
214
  UnknownVariableError: If an input_field references a variable that is not
 
226
  ... "analyze": ModelStep(...) # A step that analyzes the entities
227
  ... },
228
  ... inputs=["text"],
229
+ ... outputs={"sentiment": "analyze.sentiment", "entities": "extract.entities"}
230
  ... )
231
+ >>> final_outputs, computed_values, step_contents = execute_workflow(workflow, {"text": "Apple is launching a new product tomorrow."})
232
+ >>> print(final_outputs["sentiment"])
233
  "positive"
234
+ >>> print(final_outputs["entities"])
235
  ["Apple", "product"]
236
  """
237
  # Step 1: Pre-populate computed values with external workflow inputs.
 
251
  execution_order = topological_sort(dependencies)
252
 
253
  # Step 4: Execute steps in topological order.
254
+ step_contents: dict[str, Any] = {}
255
  for step_id in execution_order:
256
  step = workflow.steps[step_id]
257
 
258
+ outputs = execute_model_step(step, computed_values, return_full_content=return_full_content)
259
  # Execute the step
260
+ if return_full_content:
261
+ outputs, content = outputs
262
+ step_contents[step_id] = content
263
  outputs = {f"{step_id}.{k}": v for k, v in outputs.items()}
264
  computed_values.update(outputs)
265
 
 
270
  raise WorkflowError(f"Workflow output variable {var} was not produced")
271
  final_outputs[target] = computed_values[var]
272
 
273
+ step_outputs = {k: v for k, v in computed_values.items() if k not in workflow.inputs}
274
+
275
+ return final_outputs, step_outputs, step_contents
276
+
277
+
278
+ def execute_simple_workflow(
279
+ workflow: Workflow, input_values: dict[str, Any], return_full_content: bool = False
280
+ ) -> tuple[dict[str, Any], dict[str, Any], dict[str, Any]]:
281
+ """Execute a simple workflow with a single step.
282
+
283
+ This is a simplified version of execute_workflow for workflows with only one step.
284
+
285
+ Args:
286
+ workflow: The workflow to execute
287
+ input_values: Dictionary of input values
288
+ return_full_content: Whether to return the full content of each step
289
+
290
+ Returns:
291
+ Tuple containing:
292
+ - final_outputs: Dictionary of workflow outputs
293
+ - computed_values: Dictionary of all computed values
294
+ - step_contents: Dictionary of step contents (if return_full_content=True)
295
+
296
+ Raises:
297
+ WorkflowError: If the workflow has more than one step
298
+ """
299
+ if len(workflow.steps) != 1:
300
+ raise WorkflowError("Simple workflow must have exactly one step")
301
+
302
+ # Get the single step
303
+ step = list(workflow.steps.values())[0]
304
+
305
+ # Validate inputs
306
+ for var in workflow.inputs:
307
+ if var not in input_values:
308
+ raise WorkflowError(f"Missing required workflow input: {var}")
309
+
310
+ # Execute the step
311
+ if return_full_content:
312
+ step_outputs, content = execute_model_step(step, input_values, return_full_content=True)
313
+ step_contents = {step.id: content}
314
+ else:
315
+ step_outputs = execute_model_step(step, input_values, return_full_content=False)
316
+ step_contents = {}
317
+
318
+ # Prepare the final outputs
319
+ final_outputs = {}
320
+ for target, var in workflow.outputs.items():
321
+ if var.startswith(f"{step.id}."):
322
+ output_key = var.split(".", 1)[1]
323
+ if output_key in step_outputs:
324
+ final_outputs[target] = step_outputs[output_key]
325
+ else:
326
+ raise WorkflowError(f"Workflow output variable {var} was not produced")
327
+ else:
328
+ raise WorkflowError(f"Invalid output mapping: {var} does not match step ID {step.id}")
329
+
330
+ # Prepare computed values (prefixed with step ID)
331
+ computed_values = input_values.copy()
332
+ computed_values.update({f"{step.id}.{k}": v for k, v in step_outputs.items()})
333
+
334
+ return final_outputs, computed_values, step_contents
335
+
336
+
337
+ def execute_workflow(
338
+ workflow: Workflow, input_values: dict[str, Any], return_full_content: bool = False
339
+ ) -> tuple[dict[str, Any], dict[str, Any], dict[str, Any]]:
340
+ if len(workflow.steps) > 1:
341
+ return execute_multi_step_workflow(workflow, input_values, return_full_content)
342
+ else:
343
+ return execute_simple_workflow(workflow, input_values, return_full_content)
344
 
345
 
346
  def run_examples():
 
483
  run_examples()
484
 
485
  # %%
486
+
487
+ # Example usage
488
+ if __name__ == "__main__":
489
+ # Define a simple model step
490
+ model_step = ModelStep(
491
+ id="step1",
492
+ model="gpt-4o-mini",
493
+ provider="OpenAI",
494
+ call_type="llm",
495
+ system_prompt="You are a simple NLP tool that takes a string, and a number N, and return the first N entities in the string, and the total count of entities in the string.",
496
+ input_fields=[
497
+ InputField(name="sentence", description="The sentence to process", variable="sentence", func=None),
498
+ InputField(name="n", description="The number of entities to return", variable="n", func=None),
499
+ ],
500
+ output_fields=[
501
+ OutputField(
502
+ name="entities",
503
+ description="The first N entities in the string as a list of strings",
504
+ type="list[str]",
505
+ func=None,
506
+ ),
507
+ OutputField(name="count", description="The total count of entities in the string", type="int", func=None),
508
+ ],
509
+ )
510
+
511
+ # Define processed inputs
512
+ processed_inputs = {"sentence": "Abdul Akbar is a good person, but Jesus is the son of God.", "n": 3}
513
+
514
+ # Execute the model step
515
+ outputs = execute_model_step(model_step, processed_inputs)
516
+ print(outputs)
src/workflows/qb/__init__.py DELETED
File without changes
src/workflows/qb/simple_agent.py DELETED
@@ -1,186 +0,0 @@
1
- import time
2
- from typing import Any, Iterable
3
-
4
- # from litellm import completion
5
- from llms import completion
6
- from workflows.executors import execute_model_step, execute_workflow
7
- from workflows.structs import ModelStep, Workflow
8
-
9
-
10
- def _get_agent_response(self, prompt: str, system_prompt: str) -> dict:
11
- """Get response from the LLM model."""
12
- messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}]
13
-
14
- start_time = time.time()
15
- response = completion(
16
- model=self.model,
17
- messages=messages,
18
- temperature=self.temperature,
19
- max_tokens=150, # Limit token usage for faster responses
20
- )
21
- response_time = time.time() - start_time
22
-
23
- return response, response_time
24
-
25
-
26
- def _get_model_step_response(
27
- model_step: ModelStep, available_vars: dict[str, Any]
28
- ) -> tuple[dict[str, Any], str, float]:
29
- """Get response from the LLM model."""
30
- start_time = time.time()
31
- response, content = execute_model_step(model_step, available_vars, return_full_content=True)
32
- response_time = time.time() - start_time
33
- return response, content, response_time
34
-
35
-
36
- class SimpleTossupAgent:
37
- external_input_variable = "question_text"
38
- output_variables = ["answer", "confidence"]
39
-
40
- def __init__(self, workflow: Workflow, buzz_threshold: float):
41
- steps = list(workflow.steps.values())
42
- assert len(steps) == 1, "Only one step is allowed in a simple workflow"
43
- self.model_step = steps[0]
44
- self.buzz_threshold = buzz_threshold
45
- self.output_variables = list(workflow.outputs.keys())
46
-
47
- if self.external_input_variable not in workflow.inputs:
48
- raise ValueError(f"External input variable {self.external_input_variable} not found in model step inputs")
49
-
50
- for out_var in self.output_variables:
51
- if out_var not in workflow.outputs:
52
- raise ValueError(f"Output variable {out_var} not found in the workflow outputs")
53
-
54
- def run(self, question_runs: list[str], early_stop: bool = True) -> Iterable[dict]:
55
- """
56
- Process a tossup question and decide when to buzz based on confidence.
57
-
58
- Args:
59
- question_runs: Progressive reveals of the question text
60
- early_stop: Whether to stop after the first buzz
61
-
62
- Yields:
63
- Dict with answer, confidence, and whether to buzz
64
- """
65
-
66
- for i, question_text in enumerate(question_runs):
67
- response, content, response_time = _get_model_step_response(
68
- self.model_step, {self.external_input_variable: question_text}
69
- )
70
- buzz = response["confidence"] >= self.buzz_threshold
71
- result = {
72
- "answer": response["answer"],
73
- "confidence": response["confidence"],
74
- "buzz": buzz,
75
- "question_fragment": question_text,
76
- "position": i + 1,
77
- "full_response": content,
78
- "response_time": response_time,
79
- }
80
-
81
- yield result
82
-
83
- # If we've reached the confidence threshold, buzz and stop
84
- if early_stop and buzz:
85
- return
86
-
87
-
88
- class SimpleBonusAgent:
89
- external_input_variables = ["leadin", "part"]
90
- output_variables = ["answer", "confidence", "explanation"]
91
-
92
- def __init__(self, workflow: Workflow):
93
- steps = list(workflow.steps.values())
94
- assert len(steps) == 1, "Only one step is allowed in a simple workflow"
95
- self.model_step = steps[0]
96
- self.output_variables = list(workflow.outputs.keys())
97
-
98
- # Validate input variables
99
- for input_var in self.external_input_variables:
100
- if input_var not in workflow.inputs:
101
- raise ValueError(f"External input variable {input_var} not found in model step inputs")
102
-
103
- # Validate output variables
104
- for out_var in self.output_variables:
105
- if out_var not in workflow.outputs:
106
- raise ValueError(f"Output variable {out_var} not found in the workflow outputs")
107
-
108
- def run(self, leadin: str, part: str) -> dict:
109
- """
110
- Process a bonus part with the given leadin.
111
-
112
- Args:
113
- leadin: The leadin text for the bonus question
114
- part: The specific part text to answer
115
-
116
- Returns:
117
- Dict with answer, confidence, and explanation
118
- """
119
- response, content, response_time = _get_model_step_response(
120
- self.model_step,
121
- {
122
- "leadin": leadin,
123
- "part": part,
124
- },
125
- )
126
-
127
- return {
128
- "answer": response["answer"],
129
- "confidence": response["confidence"],
130
- "explanation": response["explanation"],
131
- "full_response": content,
132
- "response_time": response_time,
133
- }
134
-
135
-
136
- # Example usage
137
- if __name__ == "__main__":
138
- # Load the Quizbowl dataset
139
- from datasets import load_dataset
140
-
141
- from workflows.factory import create_quizbowl_bonus_step_initial_setup, create_quizbowl_simple_step_initial_setup
142
-
143
- ds_name = "umdclip/leaderboard_co_set"
144
- ds = load_dataset(ds_name, split="train")
145
-
146
- # Create the agents
147
- tossup_step = create_quizbowl_simple_step_initial_setup()
148
- tossup_step.model = "gpt-4"
149
- tossup_step.provider = "openai"
150
- tossup_agent = SimpleTossupAgent(workflow=tossup_step, buzz_threshold=0.9)
151
-
152
- bonus_step = create_quizbowl_bonus_step_initial_setup()
153
- bonus_step.model = "gpt-4"
154
- bonus_step.provider = "openai"
155
- bonus_agent = SimpleBonusAgent(workflow=bonus_step)
156
-
157
- # Example for tossup mode
158
- print("\n=== TOSSUP MODE EXAMPLE ===")
159
- sample_question = ds[30]
160
- print(sample_question["question_runs"][-1])
161
- print(sample_question["gold_label"])
162
- print()
163
- question_runs = sample_question["question_runs"]
164
-
165
- results = tossup_agent.run(question_runs, early_stop=True)
166
- for result in results:
167
- print(result["full_response"])
168
- print(f"Guess at position {result['position']}: {result['answer']}")
169
- print(f"Confidence: {result['confidence']}")
170
- if result["buzz"]:
171
- print("Buzzed!\n")
172
-
173
- # Example for bonus mode
174
- print("\n=== BONUS MODE EXAMPLE ===")
175
- sample_bonus = ds[31] # Assuming this is a bonus question
176
- leadin = sample_bonus["leadin"]
177
- parts = sample_bonus["parts"]
178
-
179
- print(f"Leadin: {leadin}")
180
- for i, part in enumerate(parts):
181
- print(f"\nPart {i + 1}: {part['part']}")
182
- result = bonus_agent.run(leadin, part["part"])
183
- print(f"Answer: {result['answer']}")
184
- print(f"Confidence: {result['confidence']}")
185
- print(f"Explanation: {result['explanation']}")
186
- print(f"Response time: {result['response_time']:.2f}s")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/workflows/{qb/multi_step_agent.py → qb_agents.py} RENAMED
@@ -5,15 +5,19 @@ from workflows.executors import execute_workflow
5
  from workflows.structs import Workflow
6
 
7
 
8
- def _get_workflow_response(workflow: Workflow, available_vars: dict[str, Any]) -> tuple[dict[str, Any], str, float]:
 
 
9
  """Get response from executing a complete workflow."""
10
  start_time = time.time()
11
- response, content = execute_workflow(workflow, available_vars, return_full_content=True)
 
 
12
  response_time = time.time() - start_time
13
- return response, content, response_time
14
 
15
 
16
- class MultiStepTossupAgent:
17
  """Agent for handling tossup questions with multiple steps in the workflow."""
18
 
19
  external_input_variable = "question_text"
@@ -53,26 +57,26 @@ class MultiStepTossupAgent:
53
  - buzz: Whether to buzz
54
  - question_fragment: Current question text
55
  - position: Current position in question
56
- - full_response: Complete model response
57
  - response_time: Time taken for response
58
  - step_outputs: Outputs from each step
59
  """
60
  for i, question_text in enumerate(question_runs):
61
  # Execute the complete workflow
62
- response, content, response_time = _get_workflow_response(
63
  self.workflow, {self.external_input_variable: question_text}
64
  )
65
-
66
- buzz = response["confidence"] >= self.buzz_threshold
67
  result = {
68
- "answer": response["answer"],
69
- "confidence": response["confidence"],
70
  "buzz": buzz,
71
  "question_fragment": question_text,
72
  "position": i + 1,
73
- "full_response": content,
74
  "response_time": response_time,
75
- "step_outputs": response.get("step_outputs", {}), # Include intermediate step outputs
76
  }
77
 
78
  yield result
@@ -82,7 +86,7 @@ class MultiStepTossupAgent:
82
  return
83
 
84
 
85
- class MultiStepBonusAgent:
86
  """Agent for handling bonus questions with multiple steps in the workflow."""
87
 
88
  external_input_variables = ["leadin", "part"]
@@ -119,11 +123,11 @@ class MultiStepBonusAgent:
119
  - answer: The model's answer
120
  - confidence: Confidence score
121
  - explanation: Explanation for the answer
122
- - full_response: Complete model response
123
  - response_time: Time taken for response
124
  - step_outputs: Outputs from each step
125
  """
126
- response, content, response_time = _get_workflow_response(
127
  self.workflow,
128
  {
129
  "leadin": leadin,
@@ -132,12 +136,12 @@ class MultiStepBonusAgent:
132
  )
133
 
134
  return {
135
- "answer": response["answer"],
136
- "confidence": response["confidence"],
137
- "explanation": response["explanation"],
138
- "full_response": content,
139
  "response_time": response_time,
140
- "step_outputs": response.get("step_outputs", {}), # Include intermediate step outputs
141
  }
142
 
143
 
@@ -153,10 +157,10 @@ if __name__ == "__main__":
153
 
154
  # Create the agents with multi-step workflows
155
  tossup_workflow = create_quizbowl_tossup_workflow()
156
- tossup_agent = MultiStepTossupAgent(workflow=tossup_workflow, buzz_threshold=0.9)
157
 
158
  bonus_workflow = create_quizbowl_bonus_workflow()
159
- bonus_agent = MultiStepBonusAgent(workflow=bonus_workflow)
160
 
161
  # Example for tossup mode
162
  print("\n=== TOSSUP MODE EXAMPLE ===")
@@ -168,7 +172,7 @@ if __name__ == "__main__":
168
 
169
  results = tossup_agent.run(question_runs, early_stop=True)
170
  for result in results:
171
- print(result["full_response"])
172
  print(f"Guess at position {result['position']}: {result['answer']}")
173
  print(f"Confidence: {result['confidence']}")
174
  print("Step outputs:", result["step_outputs"])
 
5
  from workflows.structs import Workflow
6
 
7
 
8
+ def _get_workflow_response(
9
+ workflow: Workflow, available_vars: dict[str, Any]
10
+ ) -> tuple[dict[str, Any], dict[str, Any], dict[str, Any], float]:
11
  """Get response from executing a complete workflow."""
12
  start_time = time.time()
13
+ final_outputs, computed_values, step_contents = execute_workflow(
14
+ workflow, available_vars, return_full_content=True
15
+ )
16
  response_time = time.time() - start_time
17
+ return final_outputs, computed_values, step_contents, response_time
18
 
19
 
20
+ class QuizBowlTossupAgent:
21
  """Agent for handling tossup questions with multiple steps in the workflow."""
22
 
23
  external_input_variable = "question_text"
 
57
  - buzz: Whether to buzz
58
  - question_fragment: Current question text
59
  - position: Current position in question
60
+ - step_contents: String content outputs of each step
61
  - response_time: Time taken for response
62
  - step_outputs: Outputs from each step
63
  """
64
  for i, question_text in enumerate(question_runs):
65
  # Execute the complete workflow
66
+ final_outputs, computed_values, step_contents, response_time = _get_workflow_response(
67
  self.workflow, {self.external_input_variable: question_text}
68
  )
69
+ print(f"Workflow response: {final_outputs}")
70
+ buzz = final_outputs["confidence"] >= self.buzz_threshold
71
  result = {
72
+ "answer": final_outputs["answer"],
73
+ "confidence": final_outputs["confidence"],
74
  "buzz": buzz,
75
  "question_fragment": question_text,
76
  "position": i + 1,
77
+ "step_contents": step_contents,
78
  "response_time": response_time,
79
+ "step_outputs": computed_values, # Include intermediate step outputs
80
  }
81
 
82
  yield result
 
86
  return
87
 
88
 
89
+ class QuizBowlBonusAgent:
90
  """Agent for handling bonus questions with multiple steps in the workflow."""
91
 
92
  external_input_variables = ["leadin", "part"]
 
123
  - answer: The model's answer
124
  - confidence: Confidence score
125
  - explanation: Explanation for the answer
126
+ - step_contents: String content outputs of each step
127
  - response_time: Time taken for response
128
  - step_outputs: Outputs from each step
129
  """
130
+ final_outputs, computed_values, step_contents, response_time = _get_workflow_response(
131
  self.workflow,
132
  {
133
  "leadin": leadin,
 
136
  )
137
 
138
  return {
139
+ "answer": final_outputs["answer"],
140
+ "confidence": final_outputs["confidence"],
141
+ "explanation": final_outputs["explanation"],
142
+ "step_contents": step_contents,
143
  "response_time": response_time,
144
+ "step_outputs": computed_values, # Include intermediate step outputs
145
  }
146
 
147
 
 
157
 
158
  # Create the agents with multi-step workflows
159
  tossup_workflow = create_quizbowl_tossup_workflow()
160
+ tossup_agent = QuizBowlTossupAgent(workflow=tossup_workflow, buzz_threshold=0.9)
161
 
162
  bonus_workflow = create_quizbowl_bonus_workflow()
163
+ bonus_agent = QuizBowlBonusAgent(workflow=bonus_workflow)
164
 
165
  # Example for tossup mode
166
  print("\n=== TOSSUP MODE EXAMPLE ===")
 
172
 
173
  results = tossup_agent.run(question_runs, early_stop=True)
174
  for result in results:
175
+ print(result["step_contents"])
176
  print(f"Guess at position {result['position']}: {result['answer']}")
177
  print(f"Confidence: {result['confidence']}")
178
  print("Step outputs:", result["step_outputs"])
src/workflows/quizbowl_agent.py DELETED
@@ -1,269 +0,0 @@
1
- # %%
2
- import json
3
- import os
4
- import time
5
- from typing import Dict, Iterable, List, Optional, Tuple, Union
6
-
7
- import litellm
8
- from datasets import load_dataset
9
- from litellm import completion
10
-
11
- litellm.drop_params = True
12
-
13
- # Set your API key - you can replace this with your actual key or use environment variables
14
- os.environ["OPENAI_API_KEY"] = (
15
- "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"
16
- )
17
-
18
- DEFAULT_SYS_PROMPT = """
19
- You are a Quizbowl expert. You will be given a question that's progressively revealed.
20
- Your goal is to identify the answer as quickly as possible with high confidence.
21
- Respond with a JSON object with two fields:
22
- 1. "answer": Your best guess for the answer
23
- 2. "confidence": Your confidence in your answer from 0.0 to 1.0
24
-
25
- DO NOT include any explanation. ONLY return the JSON object.
26
- """
27
-
28
-
29
- class QuizbowlAgent:
30
- """
31
- An agent for playing Quizbowl with two modes:
32
- 1. Tossup mode: Fast and direct with confidence calibration for buzzing
33
- 2. Bonus round mode: Provides guess, rationale, and confidence
34
- """
35
-
36
- def __init__(
37
- self,
38
- model: str = "gpt-4o-mini",
39
- buzz_threshold: float = 0.85,
40
- temperature: float = 0.2,
41
- system_prompt: str = DEFAULT_SYS_PROMPT,
42
- ):
43
- """
44
- Initialize the QuizbowlAgent.
45
-
46
- Args:
47
- model: The LLM model to use for answering
48
- buzz_threshold: Confidence threshold for buzzing in tossup mode (0-1)
49
- temperature: Temperature for model sampling
50
- """
51
- self.model = model
52
- self.buzz_threshold = buzz_threshold
53
- self.temperature = temperature
54
- self.system_prompt = system_prompt
55
-
56
- def _process_question_runs(self, question_runs: List[str]) -> List[str]:
57
- """Process question runs to extract increasing amounts of text."""
58
- # For simpler testing, just return the runs as they are in the dataset
59
- return question_runs
60
-
61
- def _get_agent_response(self, prompt: str, system_prompt: str) -> Dict:
62
- """Get response from the LLM model."""
63
- messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}]
64
-
65
- start_time = time.time()
66
- response = completion(
67
- model=self.model,
68
- messages=messages,
69
- temperature=self.temperature,
70
- max_tokens=150, # Limit token usage for faster responses
71
- )
72
- response_time = time.time() - start_time
73
-
74
- return response, response_time
75
-
76
- def _extract_confidence_and_answer(self, content: str) -> Tuple[str, float]:
77
- """Extract the answer and confidence score from the model response."""
78
- try:
79
- # Try to parse JSON from the response
80
- data = json.loads(content)
81
- answer = data.get("answer", "")
82
- confidence = float(data.get("confidence", 0.0))
83
- return answer, confidence
84
- except (json.JSONDecodeError, ValueError):
85
- # Fallback if parsing fails
86
- lines = content.strip().split("\n")
87
- answer = lines[0] if lines else ""
88
- confidence = 0.5 # Default confidence
89
-
90
- # Try to extract confidence from text
91
- for line in lines:
92
- if "confidence:" in line.lower():
93
- try:
94
- confidence = float(line.lower().split("confidence:")[1].strip())
95
- except (ValueError, IndexError):
96
- pass
97
-
98
- return answer, confidence
99
-
100
- def tossup_mode(self, question_runs: List[str]) -> Iterable[Dict]:
101
- """
102
- Process a tossup question and decide when to buzz based on confidence.
103
-
104
- Args:
105
- question_runs: Progressive reveals of the question text
106
-
107
- Yields:
108
- Dict with answer, confidence, and whether to buzz
109
- """
110
-
111
- for i, question_text in enumerate(question_runs):
112
- prompt = f"Question: {question_text}\n\nProvide your answer and confidence level:"
113
-
114
- response, response_time = self._get_agent_response(prompt, DEFAULT_SYS_PROMPT)
115
- content = response.choices[0].message.content
116
-
117
- answer, confidence = self._extract_confidence_and_answer(content)
118
-
119
- result = {
120
- "answer": answer,
121
- "confidence": confidence,
122
- "buzz": confidence >= self.buzz_threshold,
123
- "question_fragment": question_text,
124
- "position": i + 1,
125
- "full_response": content,
126
- "response_time": response_time,
127
- }
128
-
129
- yield result
130
-
131
- # If we've reached the confidence threshold, buzz and stop
132
- if confidence >= self.buzz_threshold:
133
- return
134
-
135
- def tossup_mode_top5(self, question_runs: List[str]) -> Iterable[Dict]:
136
- """
137
- Process a tossup question and provide the top 5 guesses with confidence levels.
138
-
139
- Args:
140
- question_runs: Progressive reveals of the question text
141
-
142
- Returns:
143
- Dict with top 5 answers, their confidences, and whether to buzz
144
- """
145
-
146
- for i, question_text in enumerate(question_runs):
147
- prompt = f"Question: {question_text}\n\nProvide your top 5 answers and confidence levels."
148
-
149
- response, response_time = self._get_agent_response(prompt, self.system_prompt)
150
- content = response.choices[0].message.content
151
-
152
- try:
153
- # Try to parse JSON from the response
154
- data = json.loads(content)
155
- guesses = data.get("guesses", [])
156
- except (json.JSONDecodeError, ValueError):
157
- # Fallback if parsing fails
158
- guesses = []
159
-
160
- result = {
161
- "guesses": guesses,
162
- "buzz": any(guess["confidence"] >= self.buzz_threshold for guess in guesses),
163
- "question_fragment": question_text,
164
- "position": i + 1,
165
- "full_response": content,
166
- "response_time": response_time,
167
- }
168
-
169
- yield result
170
-
171
- # If any guess reaches the confidence threshold, buzz and stop
172
- if result["buzz"]:
173
- return
174
-
175
- def bonus_round_mode(self, question: str) -> Dict:
176
- """
177
- Process a bonus round question with detailed analysis.
178
-
179
- Args:
180
- question: The bonus question text
181
-
182
- Returns:
183
- Dict with answer, rationale, and confidence
184
- """
185
- system_prompt = """
186
- You are a Quizbowl expert answering a bonus question. Provide:
187
- 1. Your direct answer
188
- 2. A very brief and crisp one line rationale for your answer (key clues that led to it)
189
- 3. Your confidence level (0.0-1.0)
190
-
191
- Respond with a JSON object with these three fields:
192
- {
193
- "answer": "Your answer here",
194
- "rationale": "Your reasoning here",
195
- "confidence": 0.XX
196
- }
197
- """
198
-
199
- prompt = f"Bonus Question: {question}\n\nProvide your answer, rationale, and confidence:"
200
-
201
- response = self._get_agent_response(prompt, system_prompt)
202
- content = response.choices[0].message.content
203
-
204
- try:
205
- # Try to parse JSON
206
- result = json.loads(content)
207
- # Ensure all fields are present
208
- if not all(k in result for k in ["answer", "rationale", "confidence"]):
209
- raise ValueError("Missing fields in response")
210
- except (json.JSONDecodeError, ValueError):
211
- # If parsing fails, extract manually
212
- lines = content.strip().split("\n")
213
- result = {"answer": "", "rationale": "", "confidence": 0.5}
214
-
215
- for line in lines:
216
- if line.lower().startswith("answer:"):
217
- result["answer"] = line[7:].strip()
218
- elif line.lower().startswith("rationale:"):
219
- result["rationale"] = line[10:].strip()
220
- elif line.lower().startswith("confidence:"):
221
- try:
222
- result["confidence"] = float(line[11:].strip())
223
- except ValueError:
224
- pass
225
-
226
- return result
227
-
228
-
229
- # %%
230
- # Example usage
231
- if __name__ == "__main__":
232
- # Load the Quizbowl dataset
233
- ds_name = "umdclip/leaderboard_co_set"
234
- ds = load_dataset(ds_name, split="train")
235
-
236
- # Create the agent
237
- agent = QuizbowlAgent(model="gpt-4-turbo", buzz_threshold=0.85)
238
-
239
- # Example for tossup mode
240
- print("\n=== TOSSUP MODE EXAMPLE ===")
241
- sample_question = ds[0]
242
- print(sample_question["question_runs"][-1])
243
- print(sample_question["gold_label"])
244
- question_runs = sample_question["question_runs"]
245
-
246
- results = agent.tossup_mode(question_runs)
247
- for result in results:
248
- print(f"Guess at position {result['position']}: {result['answer']}")
249
- print(f"Confidence: {result['confidence']}")
250
- if result["buzz"]:
251
- print("Buzzed!\n")
252
-
253
- results = agent.tossup_mode_top5(question_runs)
254
- for result in results:
255
- guesses = [f"{guess['answer']} ({guess['confidence']})" for guess in result["guesses"]]
256
- print(f"Guesses at position {result['position']}: {', '.join(guesses)}")
257
- if result["buzz"]:
258
- print("Buzzed!")
259
-
260
- # Example for bonus round mode
261
- print("\n=== BONUS ROUND MODE EXAMPLE ===")
262
- bonus_question = sample_question["question_runs"][-1]
263
-
264
- bonus_result = agent.bonus_round_mode(bonus_question)
265
- print(f"Answer: {bonus_result['answer']}")
266
- print(f"Rationale: {bonus_result['rationale']}")
267
- print(f"Confidence: {bonus_result['confidence']}")
268
-
269
- # %%
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/workflows/structs.py CHANGED
@@ -202,9 +202,13 @@ class Workflow(BaseModel):
202
  if "steps" in data and isinstance(data["steps"], list):
203
  steps_dict = {}
204
  for step in data["steps"]:
205
- if step["id"] in steps_dict:
206
- raise ValueError(f"Duplicate step ID: {step['id']}")
207
- steps_dict[step["id"]] = step
 
 
 
 
208
  data["steps"] = steps_dict
209
  return data
210
 
 
202
  if "steps" in data and isinstance(data["steps"], list):
203
  steps_dict = {}
204
  for step in data["steps"]:
205
+ if isinstance(step, ModelStep):
206
+ step_id = step.id
207
+ else:
208
+ step_id = step["id"]
209
+ if step_id in steps_dict:
210
+ raise ValueError(f"Duplicate step ID: {step_id}")
211
+ steps_dict[step_id] = step
212
  data["steps"] = steps_dict
213
  return data
214
 
tests/test_executors.py CHANGED
@@ -45,6 +45,7 @@ def test_create_processed_inputs_basic():
45
  """Test basic input processing without transformations."""
46
  step = ModelStep(
47
  id="test_step",
 
48
  model="gpt-4",
49
  provider="openai",
50
  call_type="llm",
@@ -62,6 +63,7 @@ def test_create_processed_inputs_with_transformation():
62
  """Test input processing with transformation functions."""
63
  step = ModelStep(
64
  id="test_step",
 
65
  model="gpt-4",
66
  provider="openai",
67
  call_type="llm",
@@ -82,6 +84,7 @@ def test_create_processed_inputs_missing_var():
82
  """Test that appropriate error is raised when a variable is missing."""
83
  step = ModelStep(
84
  id="test_step",
 
85
  model="gpt-4",
86
  provider="openai",
87
  call_type="llm",
@@ -99,6 +102,7 @@ def test_create_processed_inputs_unknown_func():
99
  """Test that appropriate error is raised when an unknown function is specified."""
100
  step = ModelStep(
101
  id="test_step",
 
102
  model="gpt-4",
103
  provider="openai",
104
  call_type="llm",
@@ -116,18 +120,22 @@ def test_create_processed_inputs_unknown_func():
116
  # Tests for execute_model_step
117
 
118
 
119
- @patch("workflows.executors.litellm.completion")
120
  def test_execute_model_step_success(mock_completion):
121
  """Test successful execution of a model step with mocked litellm response."""
122
  # Mock the litellm response
123
- mock_response = {"choices": [{"message": {"content": json.dumps({"summary": "This is a summary"})}}]}
 
 
 
124
  mock_completion.return_value = mock_response
125
 
126
  # Create a test step
127
  step = ModelStep(
128
  id="summarize",
 
129
  model="gpt-3.5-turbo",
130
- provider="openai",
131
  call_type="llm",
132
  system_prompt="Summarize the text",
133
  input_fields=[InputField(name="text", description="Text to summarize", variable="input_text")],
@@ -143,11 +151,11 @@ def test_execute_model_step_success(mock_completion):
143
  # Verify the litellm call was made correctly
144
  mock_completion.assert_called_once()
145
  args, kwargs = mock_completion.call_args
146
- assert kwargs["model"] == "gpt-3.5-turbo"
147
- assert "Summarize the text" in kwargs["messages"][0]["content"]
148
 
149
 
150
- @patch("workflows.executors.litellm.completion")
151
  def test_execute_model_step_error(mock_completion):
152
  """Test handling of errors in model step execution."""
153
  # Make litellm raise an exception
@@ -156,6 +164,7 @@ def test_execute_model_step_error(mock_completion):
156
  # Create a test step
157
  step = ModelStep(
158
  id="summarize",
 
159
  model="gpt-3.5-turbo",
160
  provider="openai",
161
  call_type="llm",
@@ -181,6 +190,7 @@ def test_execute_workflow_simple(mock_execute_step):
181
  # Create a simple workflow
182
  step = ModelStep(
183
  id="summarize",
 
184
  model="gpt-3.5-turbo",
185
  provider="openai",
186
  call_type="llm",
@@ -192,10 +202,14 @@ def test_execute_workflow_simple(mock_execute_step):
192
  workflow = Workflow(steps={"summarize": step}, inputs=["input_text"], outputs={"summary": "summarize.summary"})
193
 
194
  # Execute the workflow
195
- result = execute_workflow(workflow, {"input_text": "Long text to be summarized..."})
 
 
196
 
197
  # Verify the results
198
- assert result == {"summary": "This is a summary"}
 
 
199
 
200
  # Verify execute_model_step was called correctly
201
  mock_execute_step.assert_called_once()
@@ -206,7 +220,7 @@ def test_execute_workflow_multi_step(mock_execute_step):
206
  """Test execution of a multi-step workflow with dependencies."""
207
 
208
  # Configure mock to return different values based on the step
209
- def side_effect(step, available_vars):
210
  if step.id == "extract":
211
  return {"entities": ["Apple", "product"]}
212
  elif step.id == "analyze":
@@ -218,6 +232,7 @@ def test_execute_workflow_multi_step(mock_execute_step):
218
  # Create extract step
219
  extract_step = ModelStep(
220
  id="extract",
 
221
  model="gpt-3.5-turbo",
222
  provider="openai",
223
  call_type="llm",
@@ -229,6 +244,7 @@ def test_execute_workflow_multi_step(mock_execute_step):
229
  # Create analyze step that depends on extract step
230
  analyze_step = ModelStep(
231
  id="analyze",
 
232
  model="gpt-4",
233
  provider="openai",
234
  call_type="llm",
@@ -244,10 +260,18 @@ def test_execute_workflow_multi_step(mock_execute_step):
244
  )
245
 
246
  # Execute the workflow
247
- result = execute_workflow(workflow, {"input_text": "Apple is launching a new product tomorrow."})
 
 
248
 
249
  # Verify the results
250
- assert result == {"entities": ["Apple", "product"], "sentiment": "positive"}
 
 
 
 
 
 
251
 
252
  # Verify execute_model_step was called twice (once for each step)
253
  assert mock_execute_step.call_count == 2
@@ -257,6 +281,7 @@ def test_execute_workflow_missing_input():
257
  """Test that an error is raised when a required input is missing."""
258
  step = ModelStep(
259
  id="summarize",
 
260
  model="gpt-3.5-turbo",
261
  provider="openai",
262
  call_type="llm",
@@ -280,6 +305,7 @@ def test_execute_workflow_cyclic_dependency(mock_dependency_graph):
280
 
281
  step = ModelStep(
282
  id="test",
 
283
  model="gpt-3.5-turbo",
284
  provider="openai",
285
  call_type="llm",
@@ -288,8 +314,42 @@ def test_execute_workflow_cyclic_dependency(mock_dependency_graph):
288
  output_fields=[],
289
  )
290
 
291
- workflow = Workflow(steps={"test": step}, inputs=[], outputs=[])
292
 
293
  # This should propagate the CyclicDependencyError
294
  with pytest.raises(CyclicDependencyError):
295
  execute_workflow(workflow, {})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  """Test basic input processing without transformations."""
46
  step = ModelStep(
47
  id="test_step",
48
+ name="Test Step",
49
  model="gpt-4",
50
  provider="openai",
51
  call_type="llm",
 
63
  """Test input processing with transformation functions."""
64
  step = ModelStep(
65
  id="test_step",
66
+ name="Test Step",
67
  model="gpt-4",
68
  provider="openai",
69
  call_type="llm",
 
84
  """Test that appropriate error is raised when a variable is missing."""
85
  step = ModelStep(
86
  id="test_step",
87
+ name="Test Step",
88
  model="gpt-4",
89
  provider="openai",
90
  call_type="llm",
 
102
  """Test that appropriate error is raised when an unknown function is specified."""
103
  step = ModelStep(
104
  id="test_step",
105
+ name="Test Step",
106
  model="gpt-4",
107
  provider="openai",
108
  call_type="llm",
 
120
  # Tests for execute_model_step
121
 
122
 
123
+ @patch("workflows.executors.completion")
124
  def test_execute_model_step_success(mock_completion):
125
  """Test successful execution of a model step with mocked litellm response."""
126
  # Mock the litellm response
127
+ mock_response = {
128
+ "content": json.dumps({"summary": "This is a summary"}),
129
+ "output": {"summary": "This is a summary"},
130
+ }
131
  mock_completion.return_value = mock_response
132
 
133
  # Create a test step
134
  step = ModelStep(
135
  id="summarize",
136
+ name="Summarize Text",
137
  model="gpt-3.5-turbo",
138
+ provider="OpenAI",
139
  call_type="llm",
140
  system_prompt="Summarize the text",
141
  input_fields=[InputField(name="text", description="Text to summarize", variable="input_text")],
 
151
  # Verify the litellm call was made correctly
152
  mock_completion.assert_called_once()
153
  args, kwargs = mock_completion.call_args
154
+ assert kwargs["model"] == "OpenAI/gpt-3.5-turbo"
155
+ assert "Summarize the text" in kwargs["system"]
156
 
157
 
158
+ @patch("workflows.executors.completion")
159
  def test_execute_model_step_error(mock_completion):
160
  """Test handling of errors in model step execution."""
161
  # Make litellm raise an exception
 
164
  # Create a test step
165
  step = ModelStep(
166
  id="summarize",
167
+ name="Summarize Text",
168
  model="gpt-3.5-turbo",
169
  provider="openai",
170
  call_type="llm",
 
190
  # Create a simple workflow
191
  step = ModelStep(
192
  id="summarize",
193
+ name="Summarize Text",
194
  model="gpt-3.5-turbo",
195
  provider="openai",
196
  call_type="llm",
 
202
  workflow = Workflow(steps={"summarize": step}, inputs=["input_text"], outputs={"summary": "summarize.summary"})
203
 
204
  # Execute the workflow
205
+ final_outputs, computed_values, step_contents = execute_workflow(
206
+ workflow, {"input_text": "Long text to be summarized..."}
207
+ )
208
 
209
  # Verify the results
210
+ assert final_outputs == {"summary": "This is a summary"}
211
+ assert computed_values == {"input_text": "Long text to be summarized...", "summarize.summary": "This is a summary"}
212
+ assert step_contents == {}
213
 
214
  # Verify execute_model_step was called correctly
215
  mock_execute_step.assert_called_once()
 
220
  """Test execution of a multi-step workflow with dependencies."""
221
 
222
  # Configure mock to return different values based on the step
223
+ def side_effect(step, available_vars, return_full_content=False):
224
  if step.id == "extract":
225
  return {"entities": ["Apple", "product"]}
226
  elif step.id == "analyze":
 
232
  # Create extract step
233
  extract_step = ModelStep(
234
  id="extract",
235
+ name="Extract Entities",
236
  model="gpt-3.5-turbo",
237
  provider="openai",
238
  call_type="llm",
 
244
  # Create analyze step that depends on extract step
245
  analyze_step = ModelStep(
246
  id="analyze",
247
+ name="Analyze Sentiment",
248
  model="gpt-4",
249
  provider="openai",
250
  call_type="llm",
 
260
  )
261
 
262
  # Execute the workflow
263
+ final_outputs, computed_values, step_contents = execute_workflow(
264
+ workflow, {"input_text": "Apple is launching a new product tomorrow."}
265
+ )
266
 
267
  # Verify the results
268
+ assert final_outputs == {"entities": ["Apple", "product"], "sentiment": "positive"}
269
+ assert computed_values == {
270
+ "input_text": "Apple is launching a new product tomorrow.",
271
+ "extract.entities": ["Apple", "product"],
272
+ "analyze.sentiment": "positive",
273
+ }
274
+ assert step_contents == {}
275
 
276
  # Verify execute_model_step was called twice (once for each step)
277
  assert mock_execute_step.call_count == 2
 
281
  """Test that an error is raised when a required input is missing."""
282
  step = ModelStep(
283
  id="summarize",
284
+ name="Summarize Text",
285
  model="gpt-3.5-turbo",
286
  provider="openai",
287
  call_type="llm",
 
305
 
306
  step = ModelStep(
307
  id="test",
308
+ name="Test Step",
309
  model="gpt-3.5-turbo",
310
  provider="openai",
311
  call_type="llm",
 
314
  output_fields=[],
315
  )
316
 
317
+ workflow = Workflow(steps=[step], inputs=[], outputs={})
318
 
319
  # This should propagate the CyclicDependencyError
320
  with pytest.raises(CyclicDependencyError):
321
  execute_workflow(workflow, {})
322
+
323
+
324
+ @patch("workflows.executors.execute_model_step")
325
+ def test_execute_workflow_with_full_content(mock_execute_step):
326
+ """Test execution of a workflow with return_full_content=True."""
327
+ # Configure mock to return expected outputs and content
328
+ mock_execute_step.return_value = ({"summary": "This is a summary"}, "Full model response content")
329
+
330
+ # Create a simple workflow
331
+ step = ModelStep(
332
+ id="summarize",
333
+ name="Summarize Text",
334
+ model="gpt-3.5-turbo",
335
+ provider="openai",
336
+ call_type="llm",
337
+ system_prompt="Summarize the text",
338
+ input_fields=[InputField(name="text", description="Text to summarize", variable="input_text")],
339
+ output_fields=[OutputField(name="summary", description="Summary of the text", type="str")],
340
+ )
341
+
342
+ workflow = Workflow(steps=[step], inputs=["input_text"], outputs={"summary": "summarize.summary"})
343
+
344
+ # Execute the workflow with return_full_content=True
345
+ final_outputs, computed_values, step_contents = execute_workflow(
346
+ workflow, {"input_text": "Long text to be summarized..."}, return_full_content=True
347
+ )
348
+
349
+ # Verify the results
350
+ assert final_outputs == {"summary": "This is a summary"}
351
+ assert computed_values == {"input_text": "Long text to be summarized...", "summarize.summary": "This is a summary"}
352
+ assert step_contents == {"summarize": "Full model response content"}
353
+
354
+ # Verify execute_model_step was called correctly with return_full_content=True
355
+ mock_execute_step.assert_called_once_with(step, computed_values, return_full_content=True)