remote useless code
Browse files- OpenAIChatAtomicFlow.py +29 -107
OpenAIChatAtomicFlow.py
CHANGED
@@ -48,15 +48,15 @@ class OpenAIChatAtomicFlow(AtomicFlow):
|
|
48 |
response_annotators: Optional[Dict[str, MessageAnnotator]] = {}
|
49 |
|
50 |
default_search_space = {
|
51 |
-
"model": tune.choice(
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
),
|
60 |
"temperature_or_top_p": tune.choice(
|
61 |
[
|
62 |
{"temperature": tune.uniform(0, 2)},
|
@@ -133,7 +133,10 @@ class OpenAIChatAtomicFlow(AtomicFlow):
|
|
133 |
response_annotators = config.get("response_annotators", {})
|
134 |
if len(response_annotators) > 0:
|
135 |
for key, config in response_annotators.items():
|
136 |
-
|
|
|
|
|
|
|
137 |
return {"response_annotators": response_annotators}
|
138 |
|
139 |
@classmethod
|
@@ -361,6 +364,7 @@ class OpenAIChatAtomicFlow(AtomicFlow):
|
|
361 |
optimization_budget: Optional[float] = None,
|
362 |
num_samples: Optional[int] = 1,
|
363 |
logging_level: Optional[int] = logging.WARN, # TODO(yeeef)
|
|
|
364 |
**config,
|
365 |
) -> Tuple[Dict, Any]: # tune.ExperimentAnalysis
|
366 |
"""
|
@@ -396,6 +400,7 @@ class OpenAIChatAtomicFlow(AtomicFlow):
|
|
396 |
- tune.ExperimentAnalysis: The tuning results.
|
397 |
"""
|
398 |
|
|
|
399 |
space = cls.default_search_space.copy()
|
400 |
|
401 |
if config is not None:
|
@@ -413,100 +418,16 @@ class OpenAIChatAtomicFlow(AtomicFlow):
|
|
413 |
space["temperature"] = temperature
|
414 |
space["top_p"] = top_p
|
415 |
log.warning("temperature and top_p are not recommended to vary together.")
|
416 |
-
|
417 |
-
# TODO: shall we use cls method?
|
418 |
-
cls._max_valid_n_per_max_tokens, cls._min_invalid_n_per_max_tokens = {}, {}
|
419 |
-
cls.optimization_budget = optimization_budget
|
420 |
-
cls.inference_budget = inference_budget
|
421 |
-
cls._prune_hp = "best_of" if space.get("best_of", 1) != 1 else "n"
|
422 |
-
cls._prompts = space.get("prompt")
|
423 |
-
|
424 |
-
# if cls._prompts is None:
|
425 |
-
# cls._messages = space.get("messages")
|
426 |
-
# assert isinstance(cls._messages, list) and isinstance(
|
427 |
-
# cls._messages[0], (dict, list)
|
428 |
-
# ), "messages must be a list of dicts or a list of lists."
|
429 |
-
# if isinstance(cls._messages[0], dict):
|
430 |
-
# cls._messages = [cls._messages]
|
431 |
-
# space["messages"] = tune.choice(list(range(len(cls._messages))))
|
432 |
-
# else:
|
433 |
-
# assert space.get("messages") is None, "messages and prompt cannot be provided at the same time."
|
434 |
-
# assert isinstance(cls._prompts, (str, list)), "prompt must be a string or a list of strings."
|
435 |
-
# if isinstance(cls._prompts, str):
|
436 |
-
# cls._prompts = [cls._prompts]
|
437 |
-
# space["prompt"] = tune.choice(list(range(len(cls._prompts))))
|
438 |
-
# cls._stops = space.get("stop")
|
439 |
-
# if cls._stops:
|
440 |
-
# assert isinstance(
|
441 |
-
# cls._stops, (str, list)
|
442 |
-
# ), "stop must be a string, a list of strings, or a list of lists of strings."
|
443 |
-
# if not (isinstance(cls._stops, list) and isinstance(cls._stops[0], list)):
|
444 |
-
# cls._stops = [cls._stops]
|
445 |
-
# space["stop"] = tune.choice(list(range(len(cls._stops))))
|
446 |
-
|
447 |
-
# cls._config_list = space.get("config_list")
|
448 |
-
# if cls._config_list is not None:
|
449 |
-
# is_const = is_constant(cls._config_list)
|
450 |
-
# if is_const:
|
451 |
-
# space.pop("config_list")
|
452 |
-
# cls._metric, cls._mode = metric, mode
|
453 |
-
# cls._total_cost = 0 # total optimization cost
|
454 |
-
# cls._eval_func = eval_func
|
455 |
-
# cls.data = data
|
456 |
-
# cls.avg_input_tokens = None
|
457 |
-
|
458 |
-
space_model = space["model"]
|
459 |
|
460 |
-
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
if "n" in space:
|
470 |
-
subspace["n"] = space.pop("n")
|
471 |
-
choices = []
|
472 |
-
for model in space["model"]:
|
473 |
-
choices.append({"model": model, **subspace})
|
474 |
-
space["subspace"] = tune.choice(choices)
|
475 |
-
space.pop("model")
|
476 |
-
# start all the models with the same hp config
|
477 |
-
search_alg = BlendSearch(
|
478 |
-
cost_attr="cost",
|
479 |
-
cost_budget=optimization_budget,
|
480 |
-
metric=metric,
|
481 |
-
mode=mode,
|
482 |
-
space=space,
|
483 |
-
)
|
484 |
-
config0 = search_alg.suggest("t0")
|
485 |
-
points_to_evaluate = [config0]
|
486 |
-
for model in space_model:
|
487 |
-
if model != config0["subspace"]["model"]:
|
488 |
-
point = config0.copy()
|
489 |
-
point["subspace"] = point["subspace"].copy()
|
490 |
-
point["subspace"]["model"] = model
|
491 |
-
points_to_evaluate.append(point)
|
492 |
-
search_alg = BlendSearch(
|
493 |
-
cost_attr="cost",
|
494 |
-
cost_budget=optimization_budget,
|
495 |
-
metric=metric,
|
496 |
-
mode=mode,
|
497 |
-
space=space,
|
498 |
-
points_to_evaluate=points_to_evaluate,
|
499 |
-
)
|
500 |
-
else:
|
501 |
-
# TODO: currently we always falls in this branch
|
502 |
-
search_alg = BlendSearch(
|
503 |
-
cost_attr="cost",
|
504 |
-
cost_budget=optimization_budget,
|
505 |
-
metric=metric,
|
506 |
-
mode=mode,
|
507 |
-
space=space,
|
508 |
-
)
|
509 |
-
|
510 |
# Args:
|
511 |
# evaluation_function: A user-defined evaluation function.
|
512 |
# It takes a configuration as input, outputs a evaluation
|
@@ -527,7 +448,7 @@ class OpenAIChatAtomicFlow(AtomicFlow):
|
|
527 |
if temperature_or_top_p is not None:
|
528 |
search_config.update(temperature_or_top_p)
|
529 |
|
530 |
-
flow_config["model_name"] = search_config
|
531 |
generation_parameters = flow_config["generation_parameters"]
|
532 |
for generation_parameter in generation_parameters:
|
533 |
if generation_parameter == "model_kwargs":
|
@@ -558,11 +479,12 @@ class OpenAIChatAtomicFlow(AtomicFlow):
|
|
558 |
log.info(f"Tunning with config: {search_config}")
|
559 |
# TODO: the code currently only works when there is no subspace, i.e. there is only one model to tune with
|
560 |
# align search_config with flow_config
|
561 |
-
updated_flow_config = updated_flow_config_with_search_config(flow_config=
|
562 |
log.info(f"Updated flow_config: {updated_flow_config}")
|
563 |
# flow_launcher = FlowAPILauncher(flow, 1, False, 3, 0, ["code"]) TODO: maybe refactor with flow_launcher
|
564 |
|
565 |
-
# TODO: limitations: langchain api call does not give us the cost of the api call
|
|
|
566 |
final_metrics = {}
|
567 |
for sample in tune_dps:
|
568 |
sample["api_key"] = api_key
|
@@ -593,6 +515,6 @@ class OpenAIChatAtomicFlow(AtomicFlow):
|
|
593 |
verbose=3,
|
594 |
)
|
595 |
best_search_config = analysis.best_config
|
596 |
-
flow_config = updated_flow_config_with_search_config(
|
597 |
log.info(f"best search config found: {best_search_config}, analysis: {analysis.best_result}")
|
598 |
return flow_config, analysis
|
|
|
48 |
response_annotators: Optional[Dict[str, MessageAnnotator]] = {}
|
49 |
|
50 |
default_search_space = {
|
51 |
+
# "model": tune.choice(
|
52 |
+
# [
|
53 |
+
# # "text-ada-001",
|
54 |
+
# # "text-babbage-001",
|
55 |
+
# # "text-davinci-003",
|
56 |
+
# "gpt-3.5-turbo",
|
57 |
+
# # "gpt-4",
|
58 |
+
# ]
|
59 |
+
# ),
|
60 |
"temperature_or_top_p": tune.choice(
|
61 |
[
|
62 |
{"temperature": tune.uniform(0, 2)},
|
|
|
133 |
response_annotators = config.get("response_annotators", {})
|
134 |
if len(response_annotators) > 0:
|
135 |
for key, config in response_annotators.items():
|
136 |
+
if isinstance(config, MessageAnnotator):
|
137 |
+
response_annotators[key] = config
|
138 |
+
else:
|
139 |
+
response_annotators[key] = hydra.utils.instantiate(config, _convert_="partial")
|
140 |
return {"response_annotators": response_annotators}
|
141 |
|
142 |
@classmethod
|
|
|
364 |
optimization_budget: Optional[float] = None,
|
365 |
num_samples: Optional[int] = 1,
|
366 |
logging_level: Optional[int] = logging.WARN, # TODO(yeeef)
|
367 |
+
initial_flow_config: Optional[Dict] = None, # if not supplied will use default flow config of the class (xxx.yaml)
|
368 |
**config,
|
369 |
) -> Tuple[Dict, Any]: # tune.ExperimentAnalysis
|
370 |
"""
|
|
|
400 |
- tune.ExperimentAnalysis: The tuning results.
|
401 |
"""
|
402 |
|
403 |
+
initial_flow_config = initial_flow_config or cls.get_config()
|
404 |
space = cls.default_search_space.copy()
|
405 |
|
406 |
if config is not None:
|
|
|
418 |
space["temperature"] = temperature
|
419 |
space["top_p"] = top_p
|
420 |
log.warning("temperature and top_p are not recommended to vary together.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
421 |
|
422 |
+
# Note: currently we fix the model rather than make it tunable
|
423 |
+
search_alg = BlendSearch(
|
424 |
+
cost_attr="cost",
|
425 |
+
cost_budget=optimization_budget,
|
426 |
+
metric=metric,
|
427 |
+
mode=mode,
|
428 |
+
space=space,
|
429 |
+
)
|
430 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
431 |
# Args:
|
432 |
# evaluation_function: A user-defined evaluation function.
|
433 |
# It takes a configuration as input, outputs a evaluation
|
|
|
448 |
if temperature_or_top_p is not None:
|
449 |
search_config.update(temperature_or_top_p)
|
450 |
|
451 |
+
flow_config["model_name"] = search_config.get("model", flow_config["model_name"])
|
452 |
generation_parameters = flow_config["generation_parameters"]
|
453 |
for generation_parameter in generation_parameters:
|
454 |
if generation_parameter == "model_kwargs":
|
|
|
479 |
log.info(f"Tunning with config: {search_config}")
|
480 |
# TODO: the code currently only works when there is no subspace, i.e. there is only one model to tune with
|
481 |
# align search_config with flow_config
|
482 |
+
updated_flow_config = updated_flow_config_with_search_config(flow_config=initial_flow_config, search_config=search_config)
|
483 |
log.info(f"Updated flow_config: {updated_flow_config}")
|
484 |
# flow_launcher = FlowAPILauncher(flow, 1, False, 3, 0, ["code"]) TODO: maybe refactor with flow_launcher
|
485 |
|
486 |
+
# TODO: limitations: langchain api call does not give us the cost of the api call, and only give us
|
487 |
+
# one result no matter the n
|
488 |
final_metrics = {}
|
489 |
for sample in tune_dps:
|
490 |
sample["api_key"] = api_key
|
|
|
515 |
verbose=3,
|
516 |
)
|
517 |
best_search_config = analysis.best_config
|
518 |
+
flow_config = updated_flow_config_with_search_config(initial_flow_config, best_search_config)
|
519 |
log.info(f"best search config found: {best_search_config}, analysis: {analysis.best_result}")
|
520 |
return flow_config, analysis
|