|
""" |
|
This module implements a hyperparameter optimization routine for the embedding application. It utilizes TPE optimization from Optuna. |
|
|
|
Each run, the optimizer will set the default values inside the hyperparameters. At the end, it will output the best ones it has found. |
|
""" |
|
import re |
|
import json |
|
import optuna |
|
import gradio as gr |
|
import numpy as np |
|
import logging |
|
import hashlib |
|
logging.getLogger('optuna').setLevel(logging.WARNING) |
|
|
|
import extensions.superboogav2.parameters as parameters |
|
|
|
from pathlib import Path |
|
|
|
from .benchmark import benchmark |
|
from .parameters import Parameters |
|
from modules.logging_colors import logger |
|
|
|
|
|
|
|
def _markdown_hyperparams(): |
|
res = [] |
|
for param_name, param_value in Parameters.getInstance().hyperparameters.items(): |
|
|
|
param_name = re.sub(r"([_*\[\]()~`>#+-.!])", r"\\\1", param_name) |
|
param_value_default = re.sub(r"([_*\[\]()~`>#+-.!])", r"\\\1", str(param_value['default'])) if param_value['default'] else ' ' |
|
|
|
res.append('* {}: **{}**'.format(param_name, param_value_default)) |
|
|
|
return '\n'.join(res) |
|
|
|
|
|
|
|
def _convert_np_types(params): |
|
for key in params: |
|
if type(params[key]) == np.bool_: |
|
params[key] = bool(params[key]) |
|
elif type(params[key]) == np.int64: |
|
params[key] = int(params[key]) |
|
elif type(params[key]) == np.float64: |
|
params[key] = float(params[key]) |
|
return params |
|
|
|
|
|
|
|
def _set_hyperparameters(params): |
|
for param_name, param_value in params.items(): |
|
if param_name in Parameters.getInstance().hyperparameters: |
|
Parameters.getInstance().hyperparameters[param_name]['default'] = param_value |
|
|
|
|
|
|
|
def _is_optimization_param(val): |
|
is_opt = val.get('should_optimize', False) |
|
return is_opt |
|
|
|
|
|
|
|
def _get_params_hash(params): |
|
params_str = json.dumps(params, sort_keys=True) |
|
return hashlib.sha256(params_str.encode()).hexdigest() |
|
|
|
|
|
def optimize(collector, progress=gr.Progress()): |
|
|
|
progress(0, desc=f'Setting Up...') |
|
|
|
|
|
current_step = 0 |
|
|
|
|
|
best_score = 0 |
|
|
|
|
|
scores_cache = {} |
|
|
|
def objective_function(trial): |
|
nonlocal current_step |
|
nonlocal best_score |
|
nonlocal scores_cache |
|
|
|
params = {} |
|
for key, val in Parameters.getInstance().hyperparameters.items(): |
|
if _is_optimization_param(val): |
|
params[key] = trial.suggest_categorical(key, val['categories']) |
|
|
|
_set_hyperparameters(params) |
|
|
|
params_hash = _get_params_hash(params) |
|
|
|
|
|
if params_hash in scores_cache: |
|
return scores_cache[params_hash] |
|
|
|
|
|
score, max_score = benchmark(Path("extensions/superboogav2/benchmark_texts/questions.json"), collector) |
|
|
|
|
|
scores_cache[params_hash] = score |
|
|
|
result = json.dumps(_convert_np_types(params), indent=4) |
|
result += f'\nScore: {score}/{max_score}' |
|
|
|
logger.debug(result) |
|
|
|
|
|
current_step += 1 |
|
|
|
|
|
best_score = max(best_score, score) |
|
|
|
|
|
progress(current_step / parameters.get_optimization_steps(), desc=f'Optimizing... {current_step}/{parameters.get_optimization_steps()}') |
|
|
|
return -score |
|
|
|
|
|
study = optuna.create_study() |
|
study.optimize(objective_function, n_trials=int(parameters.get_optimization_steps())) |
|
|
|
best_params = study.best_params |
|
_set_hyperparameters(best_params) |
|
|
|
|
|
str_result = f"## Best parameters:\n\n{_markdown_hyperparams()}\n\n## Score:\n\n{best_score}" |
|
|
|
|
|
with open('best_params.json', 'w') as fp: |
|
json.dump(_convert_np_types(best_params), fp, indent=4) |
|
|
|
return str_result |