Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
cd73296
1
Parent(s):
6697d3c
fix bug: skip duplicate results
Browse files- src/envs.py +7 -0
- src/loaders.py +7 -2
src/envs.py
CHANGED
@@ -65,3 +65,10 @@ METRIC_LIST = [
|
|
65 |
"mrr_at_100",
|
66 |
"mrr_at_1000",
|
67 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
"mrr_at_100",
|
66 |
"mrr_at_1000",
|
67 |
]
|
68 |
+
|
69 |
+
# submissions need to be skipped for some reason
|
70 |
+
SKIP_SUBMISSIONS = [
|
71 |
+
# 2025/06/13 - the two submissions are submitted by developers to test the submission system
|
72 |
+
"AIR-Bench_24.05/bge-m3/NoReranker/results.json",
|
73 |
+
"AIR-Bench_24.05/bge-m3/NoReranker/results_20241201071239-70de025455132e3b6d6a7786130cf91f.json"
|
74 |
+
]
|
src/loaders.py
CHANGED
@@ -5,7 +5,7 @@ from typing import Dict, List, Union
|
|
5 |
import pandas as pd
|
6 |
|
7 |
from src.columns import COL_NAME_IS_ANONYMOUS, COL_NAME_REVISION, COL_NAME_TIMESTAMP
|
8 |
-
from src.envs import BENCHMARK_VERSION_LIST, DEFAULT_METRIC_LONG_DOC, DEFAULT_METRIC_QA
|
9 |
from src.models import FullEvalResult, LeaderboardDataStore, TaskType, get_safe_name
|
10 |
from src.utils import get_default_cols, get_leaderboard_df, reset_rank
|
11 |
|
@@ -26,7 +26,12 @@ def load_raw_eval_results(results_path: Union[Path, str]) -> List[FullEvalResult
|
|
26 |
if not (file.startswith("results") and file.endswith(".json")):
|
27 |
print(f"skip {file}")
|
28 |
continue
|
29 |
-
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
eval_results = {}
|
32 |
for model_result_filepath in model_result_filepaths:
|
|
|
5 |
import pandas as pd
|
6 |
|
7 |
from src.columns import COL_NAME_IS_ANONYMOUS, COL_NAME_REVISION, COL_NAME_TIMESTAMP
|
8 |
+
from src.envs import BENCHMARK_VERSION_LIST, DEFAULT_METRIC_LONG_DOC, DEFAULT_METRIC_QA, SKIP_SUBMISSIONS
|
9 |
from src.models import FullEvalResult, LeaderboardDataStore, TaskType, get_safe_name
|
10 |
from src.utils import get_default_cols, get_leaderboard_df, reset_rank
|
11 |
|
|
|
26 |
if not (file.startswith("results") and file.endswith(".json")):
|
27 |
print(f"skip {file}")
|
28 |
continue
|
29 |
+
filepath = os.path.join(root, file)
|
30 |
+
# skip the submissions that are in the skip list
|
31 |
+
if any(skip in filepath for skip in SKIP_SUBMISSIONS):
|
32 |
+
print(f"skip {filepath}")
|
33 |
+
continue
|
34 |
+
model_result_filepaths.append(filepath)
|
35 |
|
36 |
eval_results = {}
|
37 |
for model_result_filepath in model_result_filepaths:
|