Maharshi Gor commited on
Commit
d0ae1a9
·
1 Parent(s): eaa5563

Bugfix `logprob` in workflows.

Browse files
src/workflows/configs.py CHANGED
@@ -8,9 +8,11 @@ including model configurations, workflow settings, and other package-wide consta
8
  AVAILABLE_MODELS = {
9
  "OpenAI/gpt-4o": {
10
  "model": "gpt-4o-2024-11-20",
 
11
  },
12
  "OpenAI/gpt-4o-mini": {
13
  "model": "gpt-4o-mini-2024-07-18",
 
14
  },
15
  "OpenAI/gpt-3.5-turbo": {
16
  "model": "gpt-3.5-turbo-0125",
@@ -26,12 +28,15 @@ AVAILABLE_MODELS = {
26
  },
27
  "Cohere/command-r": {
28
  "model": "command-r-08-2024",
 
29
  },
30
  "Cohere/command-r-plus": {
31
  "model": "command-r-plus-08-2024",
 
32
  },
33
  "Cohere/command-r7b": {
34
  "model": "command-r7b-12-2024",
 
35
  },
36
  }
37
 
 
8
  AVAILABLE_MODELS = {
9
  "OpenAI/gpt-4o": {
10
  "model": "gpt-4o-2024-11-20",
11
+ "logprobs": True,
12
  },
13
  "OpenAI/gpt-4o-mini": {
14
  "model": "gpt-4o-mini-2024-07-18",
15
+ "logprobs": True,
16
  },
17
  "OpenAI/gpt-3.5-turbo": {
18
  "model": "gpt-3.5-turbo-0125",
 
28
  },
29
  "Cohere/command-r": {
30
  "model": "command-r-08-2024",
31
+ "logprobs": True,
32
  },
33
  "Cohere/command-r-plus": {
34
  "model": "command-r-plus-08-2024",
35
+ "logprobs": True,
36
  },
37
  "Cohere/command-r7b": {
38
  "model": "command-r7b-12-2024",
39
+ "logprobs": False,
40
  },
41
  }
42
 
src/workflows/executors.py CHANGED
@@ -231,7 +231,7 @@ def execute_model_step(
231
  if return_full_content:
232
  result["content"] = api_response["content"]
233
  if logprobs:
234
- result["logprob"] = api_response["log_prob"]
235
  return result
236
 
237
 
 
231
  if return_full_content:
232
  result["content"] = api_response["content"]
233
  if logprobs:
234
+ result["logprob"] = api_response.get("logprob")
235
  return result
236
 
237
 
src/workflows/llms.py CHANGED
@@ -129,8 +129,6 @@ def _llm_completion(
129
  Raises:
130
  ValueError: If logprobs=True with Anthropic models
131
  """
132
- if model not in AVAILABLE_MODELS:
133
- raise ValueError(f"Model {model} not supported")
134
  model_name = AVAILABLE_MODELS[model]["model"]
135
  provider = model.split("/")[0]
136
  if provider == "Cohere":
@@ -173,13 +171,19 @@ def completion(
173
  Raises:
174
  ValueError: If logprobs=True with Anthropic models
175
  """
 
 
 
 
 
 
176
  # Check cache first
177
  cached_response = llm_cache.get(model, system, prompt, response_format, temperature)
178
- if cached_response is not None:
179
  logger.info(f"Cache hit for model {model}")
180
  return cached_response
181
 
182
- logger.info(f"Cache miss for model {model}, calling API")
183
 
184
  # Continue with the original implementation for cache miss
185
  response = _llm_completion(model, system, prompt, response_format, temperature, logprobs)
 
129
  Raises:
130
  ValueError: If logprobs=True with Anthropic models
131
  """
 
 
132
  model_name = AVAILABLE_MODELS[model]["model"]
133
  provider = model.split("/")[0]
134
  if provider == "Cohere":
 
171
  Raises:
172
  ValueError: If logprobs=True with Anthropic models
173
  """
174
+ if model not in AVAILABLE_MODELS:
175
+ raise ValueError(f"Model {model} not supported")
176
+ if logprobs and not AVAILABLE_MODELS[model].get("logprobs", False):
177
+ logger.warning(f"{model} does not support logprobs feature, setting logprobs to False")
178
+ logprobs = False
179
+
180
  # Check cache first
181
  cached_response = llm_cache.get(model, system, prompt, response_format, temperature)
182
+ if cached_response and (not logprobs or cached_response.get("logprob")):
183
  logger.info(f"Cache hit for model {model}")
184
  return cached_response
185
 
186
+ logger.info(f"Cache miss for model {model}, calling API. Logprobs: {logprobs}")
187
 
188
  # Continue with the original implementation for cache miss
189
  response = _llm_completion(model, system, prompt, response_format, temperature, logprobs)
src/workflows/qb_agents.py CHANGED
@@ -1,14 +1,18 @@
1
  import time
2
  from typing import Any, Iterable, TypedDict
3
 
 
 
4
  from .executors import WorkflowOutput, execute_workflow
5
  from .structs import TossupWorkflow, Workflow
6
 
7
 
8
- def _get_workflow_response(workflow: Workflow, available_vars: dict[str, Any]) -> tuple[WorkflowOutput, float]:
 
 
9
  """Get response from executing a complete workflow."""
10
  start_time = time.time()
11
- workflow_output = execute_workflow(workflow, available_vars, return_full_content=True)
12
  response_time = time.time() - start_time
13
  return workflow_output, response_time
14
 
@@ -78,15 +82,17 @@ class QuizBowlTossupAgent:
78
  """
79
  for i, question_text in enumerate(question_runs):
80
  # Execute the complete workflow
 
81
  workflow_output, response_time = _get_workflow_response(
82
- self.workflow, {self.external_input_variable: question_text}
83
  )
84
  final_outputs = workflow_output["final_outputs"]
85
- buzz = self.workflow.buzzer.run(final_outputs["confidence"], logprob=final_outputs.get("logprob"))
86
  result: TossupResult = {
87
  "position": i + 1,
88
  "answer": final_outputs["answer"],
89
  "confidence": final_outputs["confidence"],
 
90
  "buzz": buzz,
91
  "question_fragment": question_text,
92
  "step_contents": workflow_output["step_contents"],
 
1
  import time
2
  from typing import Any, Iterable, TypedDict
3
 
4
+ from loguru import logger
5
+
6
  from .executors import WorkflowOutput, execute_workflow
7
  from .structs import TossupWorkflow, Workflow
8
 
9
 
10
+ def _get_workflow_response(
11
+ workflow: Workflow, available_vars: dict[str, Any], logprob_step: bool | str = False
12
+ ) -> tuple[WorkflowOutput, float]:
13
  """Get response from executing a complete workflow."""
14
  start_time = time.time()
15
+ workflow_output = execute_workflow(workflow, available_vars, return_full_content=True, logprob_step=logprob_step)
16
  response_time = time.time() - start_time
17
  return workflow_output, response_time
18
 
 
82
  """
83
  for i, question_text in enumerate(question_runs):
84
  # Execute the complete workflow
85
+ answer_var_step = self.workflow.outputs["answer"].split(".")[0]
86
  workflow_output, response_time = _get_workflow_response(
87
+ self.workflow, {self.external_input_variable: question_text}, logprob_step=answer_var_step
88
  )
89
  final_outputs = workflow_output["final_outputs"]
90
+ buzz = self.workflow.buzzer.run(final_outputs["confidence"], logprob=workflow_output["logprob"])
91
  result: TossupResult = {
92
  "position": i + 1,
93
  "answer": final_outputs["answer"],
94
  "confidence": final_outputs["confidence"],
95
+ "logprob": workflow_output["logprob"],
96
  "buzz": buzz,
97
  "question_fragment": question_text,
98
  "step_contents": workflow_output["step_contents"],
src/workflows/structs.py CHANGED
@@ -6,6 +6,8 @@ from typing import Any, Literal, Optional
6
  import numpy as np
7
  from pydantic import BaseModel, Field, model_validator
8
 
 
 
9
  """
10
  Core data structures for defining workflows and their components.
11
 
@@ -259,6 +261,13 @@ class Workflow(BaseModel):
259
  """Get all model selections for all steps."""
260
  return {step_id: step.get_full_model_name() for step_id, step in self.steps.items()}
261
 
 
 
 
 
 
 
 
262
  # Step update method
263
 
264
  def add_step(self, step: ModelStep) -> "Workflow":
@@ -305,14 +314,19 @@ class Buzzer(BaseModel):
305
  use_enum_values = True
306
  frozen = True
307
 
 
 
 
 
308
  def run(self, confidence: float, prob: float | None = None, logprob: float | None = None) -> bool:
309
  """Run the buzzer logic."""
310
  if logprob is not None and prob is not None:
311
  raise ValueError("Cannot provide both logprob and prob")
312
- if logprob is not None:
313
- prob = np.exp(logprob)
314
  if self.prob_threshold is None:
315
  return confidence >= self.confidence_threshold
 
 
 
316
  if self.method == BuzzerMethod.AND:
317
  return confidence >= self.confidence_threshold and prob >= self.prob_threshold
318
  elif self.method == BuzzerMethod.OR:
@@ -333,6 +347,24 @@ class TossupWorkflow(Workflow):
333
 
334
  buzzer: Buzzer
335
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336
  def update_buzzer(self, buzzer: Buzzer) -> "TossupWorkflow":
337
  """Update the buzzer."""
338
  return self.model_copy(update={"buzzer": buzzer})
 
 
 
 
 
 
6
  import numpy as np
7
  from pydantic import BaseModel, Field, model_validator
8
 
9
+ from .configs import AVAILABLE_MODELS
10
+
11
  """
12
  Core data structures for defining workflows and their components.
13
 
 
261
  """Get all model selections for all steps."""
262
  return {step_id: step.get_full_model_name() for step_id, step in self.steps.items()}
263
 
264
+ def get_output_model_selections(self) -> dict[str, str]:
265
+ """Get all output model selections for all steps."""
266
+ return {
267
+ output_var: target_var.split(".")[0] if target_var else None
268
+ for output_var, target_var in self.outputs.items()
269
+ }
270
+
271
  # Step update method
272
 
273
  def add_step(self, step: ModelStep) -> "Workflow":
 
314
  use_enum_values = True
315
  frozen = True
316
 
317
+ def update(self, **kwargs) -> "Buzzer":
318
+ """Update the buzzer with the given kwargs."""
319
+ return self.model_copy(update=kwargs)
320
+
321
  def run(self, confidence: float, prob: float | None = None, logprob: float | None = None) -> bool:
322
  """Run the buzzer logic."""
323
  if logprob is not None and prob is not None:
324
  raise ValueError("Cannot provide both logprob and prob")
 
 
325
  if self.prob_threshold is None:
326
  return confidence >= self.confidence_threshold
327
+ if logprob is None and prob is None:
328
+ raise ValueError("Must provide either logprob or prob if prob_threshold is not None")
329
+ prob = prob or float(np.exp(logprob))
330
  if self.method == BuzzerMethod.AND:
331
  return confidence >= self.confidence_threshold and prob >= self.prob_threshold
332
  elif self.method == BuzzerMethod.OR:
 
347
 
348
  buzzer: Buzzer
349
 
350
+ def get_answer_model(self, answer_var: str | None = None) -> str | None:
351
+ answer_var = answer_var or self.outputs["answer"]
352
+ if answer_var is None:
353
+ return None
354
+ step_id = answer_var.split(".")[0]
355
+ return self.steps[step_id].get_full_model_name()
356
+
357
+ def is_token_probs_supported(self, answer_var: str | None = None) -> bool:
358
+ model_name = self.get_answer_model(answer_var)
359
+ if model_name is None:
360
+ return True
361
+ return AVAILABLE_MODELS[model_name].get("logprobs", False)
362
+
363
  def update_buzzer(self, buzzer: Buzzer) -> "TossupWorkflow":
364
  """Update the buzzer."""
365
  return self.model_copy(update={"buzzer": buzzer})
366
+
367
+ def refresh_buzzer(self) -> "TossupWorkflow":
368
+ if not self.is_token_probs_supported():
369
+ return self.update_buzzer(self.buzzer.update(prob_threshold=None, method="AND"))
370
+ return self
src/workflows/utils.py CHANGED
@@ -168,7 +168,7 @@ def topological_sort(dependencies: dict[str, set[str]]) -> list[str]:
168
 
169
  nodes = list(dependencies.keys())
170
  dependents: dict[str, list[str]] = {node: [] for node in nodes}
171
- in_degree: dict[str, int] = {node: 0 for node in nodes}
172
 
173
  # Calculate in-degrees and build dependents list
174
  for node, deps in dependencies.items():
 
168
 
169
  nodes = list(dependencies.keys())
170
  dependents: dict[str, list[str]] = {node: [] for node in nodes}
171
+ in_degree: dict[str, int] = dict.fromkeys(nodes, 0)
172
 
173
  # Calculate in-degrees and build dependents list
174
  for node, deps in dependencies.items():