Maharshi Gor
commited on
Commit
·
d0ae1a9
1
Parent(s):
eaa5563
Bugfix `logprob` in workflows.
Browse files- src/workflows/configs.py +5 -0
- src/workflows/executors.py +1 -1
- src/workflows/llms.py +8 -4
- src/workflows/qb_agents.py +10 -4
- src/workflows/structs.py +34 -2
- src/workflows/utils.py +1 -1
src/workflows/configs.py
CHANGED
@@ -8,9 +8,11 @@ including model configurations, workflow settings, and other package-wide consta
|
|
8 |
AVAILABLE_MODELS = {
|
9 |
"OpenAI/gpt-4o": {
|
10 |
"model": "gpt-4o-2024-11-20",
|
|
|
11 |
},
|
12 |
"OpenAI/gpt-4o-mini": {
|
13 |
"model": "gpt-4o-mini-2024-07-18",
|
|
|
14 |
},
|
15 |
"OpenAI/gpt-3.5-turbo": {
|
16 |
"model": "gpt-3.5-turbo-0125",
|
@@ -26,12 +28,15 @@ AVAILABLE_MODELS = {
|
|
26 |
},
|
27 |
"Cohere/command-r": {
|
28 |
"model": "command-r-08-2024",
|
|
|
29 |
},
|
30 |
"Cohere/command-r-plus": {
|
31 |
"model": "command-r-plus-08-2024",
|
|
|
32 |
},
|
33 |
"Cohere/command-r7b": {
|
34 |
"model": "command-r7b-12-2024",
|
|
|
35 |
},
|
36 |
}
|
37 |
|
|
|
8 |
AVAILABLE_MODELS = {
|
9 |
"OpenAI/gpt-4o": {
|
10 |
"model": "gpt-4o-2024-11-20",
|
11 |
+
"logprobs": True,
|
12 |
},
|
13 |
"OpenAI/gpt-4o-mini": {
|
14 |
"model": "gpt-4o-mini-2024-07-18",
|
15 |
+
"logprobs": True,
|
16 |
},
|
17 |
"OpenAI/gpt-3.5-turbo": {
|
18 |
"model": "gpt-3.5-turbo-0125",
|
|
|
28 |
},
|
29 |
"Cohere/command-r": {
|
30 |
"model": "command-r-08-2024",
|
31 |
+
"logprobs": True,
|
32 |
},
|
33 |
"Cohere/command-r-plus": {
|
34 |
"model": "command-r-plus-08-2024",
|
35 |
+
"logprobs": True,
|
36 |
},
|
37 |
"Cohere/command-r7b": {
|
38 |
"model": "command-r7b-12-2024",
|
39 |
+
"logprobs": False,
|
40 |
},
|
41 |
}
|
42 |
|
src/workflows/executors.py
CHANGED
@@ -231,7 +231,7 @@ def execute_model_step(
|
|
231 |
if return_full_content:
|
232 |
result["content"] = api_response["content"]
|
233 |
if logprobs:
|
234 |
-
result["logprob"] = api_response
|
235 |
return result
|
236 |
|
237 |
|
|
|
231 |
if return_full_content:
|
232 |
result["content"] = api_response["content"]
|
233 |
if logprobs:
|
234 |
+
result["logprob"] = api_response.get("logprob")
|
235 |
return result
|
236 |
|
237 |
|
src/workflows/llms.py
CHANGED
@@ -129,8 +129,6 @@ def _llm_completion(
|
|
129 |
Raises:
|
130 |
ValueError: If logprobs=True with Anthropic models
|
131 |
"""
|
132 |
-
if model not in AVAILABLE_MODELS:
|
133 |
-
raise ValueError(f"Model {model} not supported")
|
134 |
model_name = AVAILABLE_MODELS[model]["model"]
|
135 |
provider = model.split("/")[0]
|
136 |
if provider == "Cohere":
|
@@ -173,13 +171,19 @@ def completion(
|
|
173 |
Raises:
|
174 |
ValueError: If logprobs=True with Anthropic models
|
175 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
# Check cache first
|
177 |
cached_response = llm_cache.get(model, system, prompt, response_format, temperature)
|
178 |
-
if cached_response
|
179 |
logger.info(f"Cache hit for model {model}")
|
180 |
return cached_response
|
181 |
|
182 |
-
logger.info(f"Cache miss for model {model}, calling API")
|
183 |
|
184 |
# Continue with the original implementation for cache miss
|
185 |
response = _llm_completion(model, system, prompt, response_format, temperature, logprobs)
|
|
|
129 |
Raises:
|
130 |
ValueError: If logprobs=True with Anthropic models
|
131 |
"""
|
|
|
|
|
132 |
model_name = AVAILABLE_MODELS[model]["model"]
|
133 |
provider = model.split("/")[0]
|
134 |
if provider == "Cohere":
|
|
|
171 |
Raises:
|
172 |
ValueError: If logprobs=True with Anthropic models
|
173 |
"""
|
174 |
+
if model not in AVAILABLE_MODELS:
|
175 |
+
raise ValueError(f"Model {model} not supported")
|
176 |
+
if logprobs and not AVAILABLE_MODELS[model].get("logprobs", False):
|
177 |
+
logger.warning(f"{model} does not support logprobs feature, setting logprobs to False")
|
178 |
+
logprobs = False
|
179 |
+
|
180 |
# Check cache first
|
181 |
cached_response = llm_cache.get(model, system, prompt, response_format, temperature)
|
182 |
+
if cached_response and (not logprobs or cached_response.get("logprob")):
|
183 |
logger.info(f"Cache hit for model {model}")
|
184 |
return cached_response
|
185 |
|
186 |
+
logger.info(f"Cache miss for model {model}, calling API. Logprobs: {logprobs}")
|
187 |
|
188 |
# Continue with the original implementation for cache miss
|
189 |
response = _llm_completion(model, system, prompt, response_format, temperature, logprobs)
|
src/workflows/qb_agents.py
CHANGED
@@ -1,14 +1,18 @@
|
|
1 |
import time
|
2 |
from typing import Any, Iterable, TypedDict
|
3 |
|
|
|
|
|
4 |
from .executors import WorkflowOutput, execute_workflow
|
5 |
from .structs import TossupWorkflow, Workflow
|
6 |
|
7 |
|
8 |
-
def _get_workflow_response(
|
|
|
|
|
9 |
"""Get response from executing a complete workflow."""
|
10 |
start_time = time.time()
|
11 |
-
workflow_output = execute_workflow(workflow, available_vars, return_full_content=True)
|
12 |
response_time = time.time() - start_time
|
13 |
return workflow_output, response_time
|
14 |
|
@@ -78,15 +82,17 @@ class QuizBowlTossupAgent:
|
|
78 |
"""
|
79 |
for i, question_text in enumerate(question_runs):
|
80 |
# Execute the complete workflow
|
|
|
81 |
workflow_output, response_time = _get_workflow_response(
|
82 |
-
self.workflow, {self.external_input_variable: question_text}
|
83 |
)
|
84 |
final_outputs = workflow_output["final_outputs"]
|
85 |
-
buzz = self.workflow.buzzer.run(final_outputs["confidence"], logprob=
|
86 |
result: TossupResult = {
|
87 |
"position": i + 1,
|
88 |
"answer": final_outputs["answer"],
|
89 |
"confidence": final_outputs["confidence"],
|
|
|
90 |
"buzz": buzz,
|
91 |
"question_fragment": question_text,
|
92 |
"step_contents": workflow_output["step_contents"],
|
|
|
1 |
import time
|
2 |
from typing import Any, Iterable, TypedDict
|
3 |
|
4 |
+
from loguru import logger
|
5 |
+
|
6 |
from .executors import WorkflowOutput, execute_workflow
|
7 |
from .structs import TossupWorkflow, Workflow
|
8 |
|
9 |
|
10 |
+
def _get_workflow_response(
|
11 |
+
workflow: Workflow, available_vars: dict[str, Any], logprob_step: bool | str = False
|
12 |
+
) -> tuple[WorkflowOutput, float]:
|
13 |
"""Get response from executing a complete workflow."""
|
14 |
start_time = time.time()
|
15 |
+
workflow_output = execute_workflow(workflow, available_vars, return_full_content=True, logprob_step=logprob_step)
|
16 |
response_time = time.time() - start_time
|
17 |
return workflow_output, response_time
|
18 |
|
|
|
82 |
"""
|
83 |
for i, question_text in enumerate(question_runs):
|
84 |
# Execute the complete workflow
|
85 |
+
answer_var_step = self.workflow.outputs["answer"].split(".")[0]
|
86 |
workflow_output, response_time = _get_workflow_response(
|
87 |
+
self.workflow, {self.external_input_variable: question_text}, logprob_step=answer_var_step
|
88 |
)
|
89 |
final_outputs = workflow_output["final_outputs"]
|
90 |
+
buzz = self.workflow.buzzer.run(final_outputs["confidence"], logprob=workflow_output["logprob"])
|
91 |
result: TossupResult = {
|
92 |
"position": i + 1,
|
93 |
"answer": final_outputs["answer"],
|
94 |
"confidence": final_outputs["confidence"],
|
95 |
+
"logprob": workflow_output["logprob"],
|
96 |
"buzz": buzz,
|
97 |
"question_fragment": question_text,
|
98 |
"step_contents": workflow_output["step_contents"],
|
src/workflows/structs.py
CHANGED
@@ -6,6 +6,8 @@ from typing import Any, Literal, Optional
|
|
6 |
import numpy as np
|
7 |
from pydantic import BaseModel, Field, model_validator
|
8 |
|
|
|
|
|
9 |
"""
|
10 |
Core data structures for defining workflows and their components.
|
11 |
|
@@ -259,6 +261,13 @@ class Workflow(BaseModel):
|
|
259 |
"""Get all model selections for all steps."""
|
260 |
return {step_id: step.get_full_model_name() for step_id, step in self.steps.items()}
|
261 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
262 |
# Step update method
|
263 |
|
264 |
def add_step(self, step: ModelStep) -> "Workflow":
|
@@ -305,14 +314,19 @@ class Buzzer(BaseModel):
|
|
305 |
use_enum_values = True
|
306 |
frozen = True
|
307 |
|
|
|
|
|
|
|
|
|
308 |
def run(self, confidence: float, prob: float | None = None, logprob: float | None = None) -> bool:
|
309 |
"""Run the buzzer logic."""
|
310 |
if logprob is not None and prob is not None:
|
311 |
raise ValueError("Cannot provide both logprob and prob")
|
312 |
-
if logprob is not None:
|
313 |
-
prob = np.exp(logprob)
|
314 |
if self.prob_threshold is None:
|
315 |
return confidence >= self.confidence_threshold
|
|
|
|
|
|
|
316 |
if self.method == BuzzerMethod.AND:
|
317 |
return confidence >= self.confidence_threshold and prob >= self.prob_threshold
|
318 |
elif self.method == BuzzerMethod.OR:
|
@@ -333,6 +347,24 @@ class TossupWorkflow(Workflow):
|
|
333 |
|
334 |
buzzer: Buzzer
|
335 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
336 |
def update_buzzer(self, buzzer: Buzzer) -> "TossupWorkflow":
|
337 |
"""Update the buzzer."""
|
338 |
return self.model_copy(update={"buzzer": buzzer})
|
|
|
|
|
|
|
|
|
|
|
|
6 |
import numpy as np
|
7 |
from pydantic import BaseModel, Field, model_validator
|
8 |
|
9 |
+
from .configs import AVAILABLE_MODELS
|
10 |
+
|
11 |
"""
|
12 |
Core data structures for defining workflows and their components.
|
13 |
|
|
|
261 |
"""Get all model selections for all steps."""
|
262 |
return {step_id: step.get_full_model_name() for step_id, step in self.steps.items()}
|
263 |
|
264 |
+
def get_output_model_selections(self) -> dict[str, str]:
|
265 |
+
"""Get all output model selections for all steps."""
|
266 |
+
return {
|
267 |
+
output_var: target_var.split(".")[0] if target_var else None
|
268 |
+
for output_var, target_var in self.outputs.items()
|
269 |
+
}
|
270 |
+
|
271 |
# Step update method
|
272 |
|
273 |
def add_step(self, step: ModelStep) -> "Workflow":
|
|
|
314 |
use_enum_values = True
|
315 |
frozen = True
|
316 |
|
317 |
+
def update(self, **kwargs) -> "Buzzer":
|
318 |
+
"""Update the buzzer with the given kwargs."""
|
319 |
+
return self.model_copy(update=kwargs)
|
320 |
+
|
321 |
def run(self, confidence: float, prob: float | None = None, logprob: float | None = None) -> bool:
|
322 |
"""Run the buzzer logic."""
|
323 |
if logprob is not None and prob is not None:
|
324 |
raise ValueError("Cannot provide both logprob and prob")
|
|
|
|
|
325 |
if self.prob_threshold is None:
|
326 |
return confidence >= self.confidence_threshold
|
327 |
+
if logprob is None and prob is None:
|
328 |
+
raise ValueError("Must provide either logprob or prob if prob_threshold is not None")
|
329 |
+
prob = prob or float(np.exp(logprob))
|
330 |
if self.method == BuzzerMethod.AND:
|
331 |
return confidence >= self.confidence_threshold and prob >= self.prob_threshold
|
332 |
elif self.method == BuzzerMethod.OR:
|
|
|
347 |
|
348 |
buzzer: Buzzer
|
349 |
|
350 |
+
def get_answer_model(self, answer_var: str | None = None) -> str | None:
|
351 |
+
answer_var = answer_var or self.outputs["answer"]
|
352 |
+
if answer_var is None:
|
353 |
+
return None
|
354 |
+
step_id = answer_var.split(".")[0]
|
355 |
+
return self.steps[step_id].get_full_model_name()
|
356 |
+
|
357 |
+
def is_token_probs_supported(self, answer_var: str | None = None) -> bool:
|
358 |
+
model_name = self.get_answer_model(answer_var)
|
359 |
+
if model_name is None:
|
360 |
+
return True
|
361 |
+
return AVAILABLE_MODELS[model_name].get("logprobs", False)
|
362 |
+
|
363 |
def update_buzzer(self, buzzer: Buzzer) -> "TossupWorkflow":
|
364 |
"""Update the buzzer."""
|
365 |
return self.model_copy(update={"buzzer": buzzer})
|
366 |
+
|
367 |
+
def refresh_buzzer(self) -> "TossupWorkflow":
|
368 |
+
if not self.is_token_probs_supported():
|
369 |
+
return self.update_buzzer(self.buzzer.update(prob_threshold=None, method="AND"))
|
370 |
+
return self
|
src/workflows/utils.py
CHANGED
@@ -168,7 +168,7 @@ def topological_sort(dependencies: dict[str, set[str]]) -> list[str]:
|
|
168 |
|
169 |
nodes = list(dependencies.keys())
|
170 |
dependents: dict[str, list[str]] = {node: [] for node in nodes}
|
171 |
-
in_degree: dict[str, int] =
|
172 |
|
173 |
# Calculate in-degrees and build dependents list
|
174 |
for node, deps in dependencies.items():
|
|
|
168 |
|
169 |
nodes = list(dependencies.keys())
|
170 |
dependents: dict[str, list[str]] = {node: [] for node in nodes}
|
171 |
+
in_degree: dict[str, int] = dict.fromkeys(nodes, 0)
|
172 |
|
173 |
# Calculate in-degrees and build dependents list
|
174 |
for node, deps in dependencies.items():
|