Spaces:
AIR-Bench
/
Running on CPU Upgrade

nan commited on
Commit
0531c42
1 Parent(s): 7cbfcef

fix: fix the ranking resetting issue

Browse files
Files changed (2) hide show
  1. app.py +60 -12
  2. src/utils.py +31 -13
app.py CHANGED
@@ -21,14 +21,14 @@ def restart_space():
21
  API.restart_space(repo_id=REPO_ID)
22
 
23
 
24
- try:
25
- snapshot_download(
26
- repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30,
27
- token=TOKEN
28
- )
29
- except Exception as e:
30
- print(f'failed to download')
31
- restart_space()
32
 
33
  raw_data = get_raw_eval_results(f"{EVAL_RESULTS_PATH}/AIR-Bench_24.04")
34
 
@@ -74,6 +74,28 @@ def update_metric_long_doc(
74
  return update_metric(raw_data, "long-doc", metric, domains, langs, reranking_model, query, show_anonymous)
75
 
76
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  demo = gr.Blocks(css=custom_css)
78
  with demo:
79
  gr.HTML(TITLE)
@@ -153,7 +175,20 @@ with demo:
153
 
154
  # Set search_bar listener
155
  search_bar.submit(
156
- update_table,
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  [
158
  hidden_leaderboard_table_for_search,
159
  selected_domains,
@@ -167,7 +202,7 @@ with demo:
167
 
168
  # Set column-wise listener
169
  for selector in [
170
- selected_domains, selected_langs, selected_rerankings, show_anonymous
171
  ]:
172
  selector.change(
173
  update_table,
@@ -271,7 +306,20 @@ with demo:
271
 
272
  # Set search_bar listener
273
  search_bar.submit(
274
- update_table_long_doc,
 
 
 
 
 
 
 
 
 
 
 
 
 
275
  [
276
  hidden_leaderboard_table_for_search,
277
  selected_domains,
@@ -285,7 +333,7 @@ with demo:
285
 
286
  # Set column-wise listener
287
  for selector in [
288
- selected_domains, selected_langs, selected_rerankings, show_anonymous
289
  ]:
290
  selector.change(
291
  update_table_long_doc,
 
21
  API.restart_space(repo_id=REPO_ID)
22
 
23
 
24
+ # try:
25
+ # snapshot_download(
26
+ # repo_id=RESULTS_REPO, local_dir=EVAL_RESULTS_PATH, repo_type="dataset", tqdm_class=None, etag_timeout=30,
27
+ # token=TOKEN
28
+ # )
29
+ # except Exception as e:
30
+ # print(f'failed to download')
31
+ # restart_space()
32
 
33
  raw_data = get_raw_eval_results(f"{EVAL_RESULTS_PATH}/AIR-Bench_24.04")
34
 
 
74
  return update_metric(raw_data, "long-doc", metric, domains, langs, reranking_model, query, show_anonymous)
75
 
76
 
77
+ def update_table_without_ranking(
78
+ hidden_df,
79
+ domains,
80
+ langs,
81
+ reranking_query,
82
+ query,
83
+ show_anonymous
84
+ ):
85
+ return update_table(hidden_df, domains, langs, reranking_query, query, show_anonymous, reset_ranking=False)
86
+
87
+
88
+ def update_table_without_ranking_long_doc(
89
+ hidden_df,
90
+ domains,
91
+ langs,
92
+ reranking_query,
93
+ query,
94
+ show_anonymous
95
+ ):
96
+ return update_table_long_doc(hidden_df, domains, langs, reranking_query, query, show_anonymous, reset_ranking=False)
97
+
98
+
99
  demo = gr.Blocks(css=custom_css)
100
  with demo:
101
  gr.HTML(TITLE)
 
175
 
176
  # Set search_bar listener
177
  search_bar.submit(
178
+ update_table_without_ranking,
179
+ [
180
+ hidden_leaderboard_table_for_search,
181
+ selected_domains,
182
+ selected_langs,
183
+ selected_rerankings,
184
+ search_bar,
185
+ show_anonymous,
186
+ ],
187
+ leaderboard_table,
188
+ )
189
+
190
+ selected_rerankings.change(
191
+ update_table_without_ranking,
192
  [
193
  hidden_leaderboard_table_for_search,
194
  selected_domains,
 
202
 
203
  # Set column-wise listener
204
  for selector in [
205
+ selected_domains, selected_langs, show_anonymous
206
  ]:
207
  selector.change(
208
  update_table,
 
306
 
307
  # Set search_bar listener
308
  search_bar.submit(
309
+ update_table_without_ranking_long_doc,
310
+ [
311
+ hidden_leaderboard_table_for_search,
312
+ selected_domains,
313
+ selected_langs,
314
+ selected_rerankings,
315
+ search_bar,
316
+ show_anonymous,
317
+ ],
318
+ leaderboard_table_long_doc,
319
+ )
320
+
321
+ selected_rerankings.change(
322
+ update_table_without_ranking_long_doc,
323
  [
324
  hidden_leaderboard_table_for_search,
325
  selected_domains,
 
333
 
334
  # Set column-wise listener
335
  for selector in [
336
+ selected_domains, selected_langs, show_anonymous
337
  ]:
338
  selector.change(
339
  update_table_long_doc,
src/utils.py CHANGED
@@ -92,7 +92,13 @@ FIXED_COLS = [c.name for _, _, c in fixed_cols]
92
  FIXED_COLS_TYPES = [c.type for _, _, c in fixed_cols]
93
 
94
 
95
- def select_columns(df: pd.DataFrame, domain_query: list, language_query: list, task: str = "qa") -> pd.DataFrame:
 
 
 
 
 
 
96
  cols, _ = get_default_cols(task=task, columns=df.columns, add_fix_cols=False)
97
  selected_cols = []
98
  for c in cols:
@@ -110,25 +116,41 @@ def select_columns(df: pd.DataFrame, domain_query: list, language_query: list, t
110
  filtered_df[COL_NAME_AVG] = filtered_df[selected_cols].apply(calculate_mean, axis=1).round(decimals=2)
111
  filtered_df.sort_values(by=[COL_NAME_AVG], ascending=False, inplace=True)
112
  filtered_df.reset_index(inplace=True, drop=True)
113
- filtered_df[COL_NAME_RANK] = filtered_df[COL_NAME_AVG].rank(ascending=False, method="min")
 
114
 
115
  return filtered_df
116
 
117
 
118
- def update_table(
 
119
  hidden_df: pd.DataFrame,
120
  domains: list,
121
  langs: list,
122
  reranking_query: list,
123
  query: str,
124
- show_anonymous: bool
 
125
  ):
126
  filtered_df = hidden_df.copy()
127
  if not show_anonymous:
128
  filtered_df = filtered_df[~filtered_df[COL_NAME_IS_ANONYMOUS]]
129
  filtered_df = filter_models(filtered_df, reranking_query)
130
  filtered_df = filter_queries(query, filtered_df)
131
- return select_columns(filtered_df, domains, langs, task='qa')
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
133
 
134
  def update_table_long_doc(
@@ -137,15 +159,11 @@ def update_table_long_doc(
137
  langs: list,
138
  reranking_query: list,
139
  query: str,
140
- show_anonymous: bool
 
141
  ):
142
- filtered_df = hidden_df
143
- if not show_anonymous:
144
- filtered_df = filtered_df[~filtered_df[COL_NAME_IS_ANONYMOUS]]
145
- filtered_df = filter_models(filtered_df, reranking_query)
146
- filtered_df = filter_queries(query, filtered_df)
147
- df = select_columns(filtered_df, domains, langs, task='long-doc')
148
- return df
149
 
150
 
151
  def update_metric(
 
92
  FIXED_COLS_TYPES = [c.type for _, _, c in fixed_cols]
93
 
94
 
95
+ def select_columns(
96
+ df: pd.DataFrame,
97
+ domain_query: list,
98
+ language_query: list,
99
+ task: str = "qa",
100
+ reset_ranking: bool = True
101
+ ) -> pd.DataFrame:
102
  cols, _ = get_default_cols(task=task, columns=df.columns, add_fix_cols=False)
103
  selected_cols = []
104
  for c in cols:
 
116
  filtered_df[COL_NAME_AVG] = filtered_df[selected_cols].apply(calculate_mean, axis=1).round(decimals=2)
117
  filtered_df.sort_values(by=[COL_NAME_AVG], ascending=False, inplace=True)
118
  filtered_df.reset_index(inplace=True, drop=True)
119
+ if reset_ranking:
120
+ filtered_df[COL_NAME_RANK] = filtered_df[COL_NAME_AVG].rank(ascending=False, method="min")
121
 
122
  return filtered_df
123
 
124
 
125
+ def _update_table(
126
+ task: str,
127
  hidden_df: pd.DataFrame,
128
  domains: list,
129
  langs: list,
130
  reranking_query: list,
131
  query: str,
132
+ show_anonymous: bool,
133
+ reset_ranking: bool = True
134
  ):
135
  filtered_df = hidden_df.copy()
136
  if not show_anonymous:
137
  filtered_df = filtered_df[~filtered_df[COL_NAME_IS_ANONYMOUS]]
138
  filtered_df = filter_models(filtered_df, reranking_query)
139
  filtered_df = filter_queries(query, filtered_df)
140
+ return select_columns(filtered_df, domains, langs, task, reset_ranking)
141
+
142
+
143
+ def update_table(
144
+ hidden_df: pd.DataFrame,
145
+ domains: list,
146
+ langs: list,
147
+ reranking_query: list,
148
+ query: str,
149
+ show_anonymous: bool,
150
+ reset_ranking: bool = True
151
+ ):
152
+ return _update_table(
153
+ "qa", hidden_df, domains, langs, reranking_query, query, show_anonymous, reset_ranking)
154
 
155
 
156
  def update_table_long_doc(
 
159
  langs: list,
160
  reranking_query: list,
161
  query: str,
162
+ show_anonymous: bool,
163
+ reset_ranking: bool = True
164
  ):
165
+ return _update_table(
166
+ "long-doc", hidden_df, domains, langs, reranking_query, query, show_anonymous, reset_ranking)
 
 
 
 
 
167
 
168
 
169
  def update_metric(