vidore-leaderboard / data /model_handler.py
manu's picture
Use MTEB results in new leaderboard (#8)
19d93fe verified
from git import Repo
import shutil
import os
import json
import pandas as pd
from .dataset_handler import VIDORE_V1_MTEB_NAMES, VIDORE_V2_MTEB_NAMES, get_datasets_nickname
class ModelHandler:
def __init__(self):
self.model_infos = {}
@staticmethod
def get_folders(dir_path):
return sorted([
path_
for path_ in os.listdir(dir_path)
if os.path.isdir(os.path.join(dir_path, path_))
])
def get_vidore_data(self, metric="ndcg_at_5"):
repo_url = "https://github.com/embeddings-benchmark/results.git"
local_path = "./results"
folder_of_interest = "results"
if os.path.exists(local_path):
repo = Repo(local_path)
origin = repo.remotes.origin
origin.pull()
else:
Repo.clone_from(repo_url, local_path, depth=1)
model_names = self.get_folders(os.path.join(local_path, folder_of_interest))
for model_name in model_names:
revisions = self.get_folders(os.path.join(local_path, folder_of_interest, model_name))
first_revision = revisions[0]
result_filenames = [
result_filename
for result_filename in os.listdir(os.path.join(local_path, folder_of_interest, model_name, first_revision))
# if result_filename.endswith(".json") and result_filename != "model_meta.json"
]
if "model_meta.json" in result_filenames:
with open(os.path.join(local_path, folder_of_interest, model_name, first_revision, "model_meta.json"), "r") as f:
meta = json.load(f)
else:
meta = {}
results = {}
if all(f"{v1_dataset_name}.json" in result_filenames for v1_dataset_name in VIDORE_V1_MTEB_NAMES):
for v1_dataset_name in VIDORE_V1_MTEB_NAMES:
with open(os.path.join(local_path, folder_of_interest, model_name, first_revision, f"{v1_dataset_name}.json"), "r") as f:
results[v1_dataset_name] = json.load(f)
if all(f"{v2_dataset_name}.json" in result_filenames for v2_dataset_name in VIDORE_V2_MTEB_NAMES):
for v2_dataset_name in VIDORE_V2_MTEB_NAMES:
with open(os.path.join(local_path, folder_of_interest, model_name, first_revision, f"{v2_dataset_name}.json"), "r") as f:
results[v2_dataset_name] = json.load(f)
if model_name not in self.model_infos:
self.model_infos[model_name] = {}
self.model_infos[model_name] = {"meta": meta, "results": results}
def filter_models_by_benchmark(self, benchmark_version=1):
filtered_model_infos = {}
keywords = VIDORE_V1_MTEB_NAMES if benchmark_version == 1 else VIDORE_V2_MTEB_NAMES
for model, info in self.model_infos.items():
results = info["results"]
if any(any(keyword in dataset for keyword in keywords) for dataset in results.keys()):
filtered_model_infos[model] = info
return filtered_model_infos
def render_df(self, metric="ndcg_at_5", benchmark_version=1):
model_res = {}
filtered_model_infos = self.filter_models_by_benchmark(benchmark_version)
if len(filtered_model_infos) > 0:
for model in filtered_model_infos.keys():
res = filtered_model_infos[model]["results"]
dataset_res = {}
keywords = VIDORE_V1_MTEB_NAMES if benchmark_version == 1 else VIDORE_V2_MTEB_NAMES
if "n_parameters" in filtered_model_infos[model]["meta"]:
dataset_res["Model Size (Million Parameters)"] = filtered_model_infos[model]["meta"]["n_parameters"] // 1_000_000
else:
dataset_res["Model Size (Million Parameters)"] = None
for dataset in res.keys():
if not any(keyword in dataset for keyword in keywords):
continue
dataset_nickname = get_datasets_nickname(dataset)
dataset_res[dataset_nickname] = res[dataset]["scores"]["test"][0][metric]
model_res[model] = dataset_res
df = pd.DataFrame(model_res).T
return df
return pd.DataFrame()