Spaces:
AIR-Bench
/
Running on CPU Upgrade

hanhainebula commited on
Commit
cd73296
·
1 Parent(s): 6697d3c

fix bug: skip duplicate results

Browse files
Files changed (2) hide show
  1. src/envs.py +7 -0
  2. src/loaders.py +7 -2
src/envs.py CHANGED
@@ -65,3 +65,10 @@ METRIC_LIST = [
65
  "mrr_at_100",
66
  "mrr_at_1000",
67
  ]
 
 
 
 
 
 
 
 
65
  "mrr_at_100",
66
  "mrr_at_1000",
67
  ]
68
+
69
+ # submissions need to be skipped for some reason
70
+ SKIP_SUBMISSIONS = [
71
+ # 2025/06/13 - the two submissions are submitted by developers to test the submission system
72
+ "AIR-Bench_24.05/bge-m3/NoReranker/results.json",
73
+ "AIR-Bench_24.05/bge-m3/NoReranker/results_20241201071239-70de025455132e3b6d6a7786130cf91f.json"
74
+ ]
src/loaders.py CHANGED
@@ -5,7 +5,7 @@ from typing import Dict, List, Union
5
  import pandas as pd
6
 
7
  from src.columns import COL_NAME_IS_ANONYMOUS, COL_NAME_REVISION, COL_NAME_TIMESTAMP
8
- from src.envs import BENCHMARK_VERSION_LIST, DEFAULT_METRIC_LONG_DOC, DEFAULT_METRIC_QA
9
  from src.models import FullEvalResult, LeaderboardDataStore, TaskType, get_safe_name
10
  from src.utils import get_default_cols, get_leaderboard_df, reset_rank
11
 
@@ -26,7 +26,12 @@ def load_raw_eval_results(results_path: Union[Path, str]) -> List[FullEvalResult
26
  if not (file.startswith("results") and file.endswith(".json")):
27
  print(f"skip {file}")
28
  continue
29
- model_result_filepaths.append(os.path.join(root, file))
 
 
 
 
 
30
 
31
  eval_results = {}
32
  for model_result_filepath in model_result_filepaths:
 
5
  import pandas as pd
6
 
7
  from src.columns import COL_NAME_IS_ANONYMOUS, COL_NAME_REVISION, COL_NAME_TIMESTAMP
8
+ from src.envs import BENCHMARK_VERSION_LIST, DEFAULT_METRIC_LONG_DOC, DEFAULT_METRIC_QA, SKIP_SUBMISSIONS
9
  from src.models import FullEvalResult, LeaderboardDataStore, TaskType, get_safe_name
10
  from src.utils import get_default_cols, get_leaderboard_df, reset_rank
11
 
 
26
  if not (file.startswith("results") and file.endswith(".json")):
27
  print(f"skip {file}")
28
  continue
29
+ filepath = os.path.join(root, file)
30
+ # skip the submissions that are in the skip list
31
+ if any(skip in filepath for skip in SKIP_SUBMISSIONS):
32
+ print(f"skip {filepath}")
33
+ continue
34
+ model_result_filepaths.append(filepath)
35
 
36
  eval_results = {}
37
  for model_result_filepath in model_result_filepaths: