leaderboard

Running on CPU Upgrade

hanhainebula commited on 12 days ago

Commit

cd73296

1 Parent(s): 6697d3c

fix bug: skip duplicate results

Files changed (2) hide show

src/envs.py CHANGED Viewed

@@ -65,3 +65,10 @@ METRIC_LIST = [
     "mrr_at_100",
     "mrr_at_1000",
 ]

     "mrr_at_100",
     "mrr_at_1000",
 ]
+# submissions need to be skipped for some reason
+SKIP_SUBMISSIONS = [
+    # 2025/06/13 - the two submissions are submitted by developers to test the submission system
+    "AIR-Bench_24.05/bge-m3/NoReranker/results.json",
+    "AIR-Bench_24.05/bge-m3/NoReranker/results_20241201071239-70de025455132e3b6d6a7786130cf91f.json"
+]

src/loaders.py CHANGED Viewed

@@ -5,7 +5,7 @@ from typing import Dict, List, Union
 import pandas as pd
 from src.columns import COL_NAME_IS_ANONYMOUS, COL_NAME_REVISION, COL_NAME_TIMESTAMP
-from src.envs import BENCHMARK_VERSION_LIST, DEFAULT_METRIC_LONG_DOC, DEFAULT_METRIC_QA
 from src.models import FullEvalResult, LeaderboardDataStore, TaskType, get_safe_name
 from src.utils import get_default_cols, get_leaderboard_df, reset_rank
@@ -26,7 +26,12 @@ def load_raw_eval_results(results_path: Union[Path, str]) -> List[FullEvalResult
             if not (file.startswith("results") and file.endswith(".json")):
                 print(f"skip {file}")
                 continue
-            model_result_filepaths.append(os.path.join(root, file))
     eval_results = {}
     for model_result_filepath in model_result_filepaths:

 import pandas as pd
 from src.columns import COL_NAME_IS_ANONYMOUS, COL_NAME_REVISION, COL_NAME_TIMESTAMP
+from src.envs import BENCHMARK_VERSION_LIST, DEFAULT_METRIC_LONG_DOC, DEFAULT_METRIC_QA, SKIP_SUBMISSIONS
 from src.models import FullEvalResult, LeaderboardDataStore, TaskType, get_safe_name
 from src.utils import get_default_cols, get_leaderboard_df, reset_rank
             if not (file.startswith("results") and file.endswith(".json")):
                 print(f"skip {file}")
                 continue
+            filepath = os.path.join(root, file)
+            # skip the submissions that are in the skip list
+            if any(skip in filepath for skip in SKIP_SUBMISSIONS):
+                print(f"skip {filepath}")
+                continue
+            model_result_filepaths.append(filepath)
     eval_results = {}
     for model_result_filepath in model_result_filepaths: