Spaces:
AIR-Bench
/
Running on CPU Upgrade

nan commited on
Commit
08fea1e
·
1 Parent(s): d27648d

fix: fix the mean calculation for NAN values

Browse files
Files changed (2) hide show
  1. src/read_evals.py +8 -2
  2. src/utils.py +2 -1
src/read_evals.py CHANGED
@@ -7,7 +7,6 @@ from typing import List
7
  import pandas as pd
8
 
9
  from src.benchmarks import get_safe_name
10
- from src.display.formatting import has_no_nan_values
11
  from src.display.utils import (
12
  COL_NAME_RERANKING_MODEL,
13
  COL_NAME_RETRIEVAL_MODEL,
@@ -27,6 +26,13 @@ from src.display.utils import (
27
  from src.display.formatting import make_clickable_model
28
 
29
 
 
 
 
 
 
 
 
30
  @dataclass
31
  class EvalResult:
32
  """
@@ -203,7 +209,7 @@ def get_leaderboard_df(raw_data: List[FullEvalResult], task: str, metric: str) -
203
  _benchmark_cols = frozenset(benchmark_cols).intersection(frozenset(df.columns.to_list()))
204
 
205
  # calculate the average score for selected benchmarks
206
- df[COL_NAME_AVG] = df[list(_benchmark_cols)].mean(axis=1).round(decimals=2)
207
  df.sort_values(by=[COL_NAME_AVG], ascending=False, inplace=True)
208
  df.reset_index(inplace=True, drop=True)
209
 
 
7
  import pandas as pd
8
 
9
  from src.benchmarks import get_safe_name
 
10
  from src.display.utils import (
11
  COL_NAME_RERANKING_MODEL,
12
  COL_NAME_RETRIEVAL_MODEL,
 
26
  from src.display.formatting import make_clickable_model
27
 
28
 
29
+ def calculate_mean(row):
30
+ if pd.isna(row).any():
31
+ return 0
32
+ else:
33
+ return row.mean()
34
+
35
+
36
  @dataclass
37
  class EvalResult:
38
  """
 
209
  _benchmark_cols = frozenset(benchmark_cols).intersection(frozenset(df.columns.to_list()))
210
 
211
  # calculate the average score for selected benchmarks
212
+ df[COL_NAME_AVG] = df[list(_benchmark_cols)].apply(calculate_mean, axis=1).round(decimals=2)
213
  df.sort_values(by=[COL_NAME_AVG], ascending=False, inplace=True)
214
  df.reset_index(inplace=True, drop=True)
215
 
src/utils.py CHANGED
@@ -11,7 +11,7 @@ from src.display.formatting import styled_message, styled_error
11
  from src.display.utils import COLS_QA, TYPES_QA, COLS_LONG_DOC, TYPES_LONG_DOC, COL_NAME_RANK, COL_NAME_AVG, \
12
  COL_NAME_RERANKING_MODEL, COL_NAME_RETRIEVAL_MODEL, COL_NAME_IS_ANONYMOUS, get_default_auto_eval_column_dict
13
  from src.envs import API, SEARCH_RESULTS_REPO
14
- from src.read_evals import FullEvalResult, get_leaderboard_df
15
 
16
 
17
  def filter_models(df: pd.DataFrame, reranking_query: list) -> pd.DataFrame:
@@ -100,6 +100,7 @@ def select_columns(df: pd.DataFrame, domain_query: list, language_query: list, t
100
  # We use COLS to maintain sorting
101
  filtered_df = df[FIXED_COLS + selected_cols]
102
  filtered_df[COL_NAME_AVG] = filtered_df[selected_cols].mean(axis=1, numeric_only=True).round(decimals=2)
 
103
  filtered_df.sort_values(by=[COL_NAME_AVG], ascending=False, inplace=True)
104
  filtered_df.reset_index(inplace=True, drop=True)
105
  filtered_df[COL_NAME_RANK] = filtered_df[COL_NAME_AVG].rank(ascending=False, method="min")
 
11
  from src.display.utils import COLS_QA, TYPES_QA, COLS_LONG_DOC, TYPES_LONG_DOC, COL_NAME_RANK, COL_NAME_AVG, \
12
  COL_NAME_RERANKING_MODEL, COL_NAME_RETRIEVAL_MODEL, COL_NAME_IS_ANONYMOUS, get_default_auto_eval_column_dict
13
  from src.envs import API, SEARCH_RESULTS_REPO
14
+ from src.read_evals import FullEvalResult, get_leaderboard_df, calculate_mean
15
 
16
 
17
  def filter_models(df: pd.DataFrame, reranking_query: list) -> pd.DataFrame:
 
100
  # We use COLS to maintain sorting
101
  filtered_df = df[FIXED_COLS + selected_cols]
102
  filtered_df[COL_NAME_AVG] = filtered_df[selected_cols].mean(axis=1, numeric_only=True).round(decimals=2)
103
+ filtered_df[COL_NAME_AVG] = filtered_df[selected_cols].apply(calculate_mean, axis=1).round(decimals=2)
104
  filtered_df.sort_values(by=[COL_NAME_AVG], ascending=False, inplace=True)
105
  filtered_df.reset_index(inplace=True, drop=True)
106
  filtered_df[COL_NAME_RANK] = filtered_df[COL_NAME_AVG].rank(ascending=False, method="min")