Spaces:
AIR-Bench
/
Running on CPU Upgrade

feat-add-reranker-tab-0607

#21
by nan - opened
Files changed (1) hide show
  1. app.py +103 -17
app.py CHANGED
@@ -11,10 +11,10 @@ from src.about import (
11
  from src.benchmarks import DOMAIN_COLS_QA, LANG_COLS_QA, DOMAIN_COLS_LONG_DOC, LANG_COLS_LONG_DOC, METRIC_LIST, \
12
  DEFAULT_METRIC_QA, DEFAULT_METRIC_LONG_DOC
13
  from src.display.css_html_js import custom_css
14
- from src.display.utils import COL_NAME_IS_ANONYMOUS, COL_NAME_REVISION, COL_NAME_TIMESTAMP, COL_NAME_RERANKING_MODEL
15
  from src.envs import API, EVAL_RESULTS_PATH, REPO_ID, RESULTS_REPO, TOKEN
16
  from src.read_evals import get_raw_eval_results, get_leaderboard_df
17
- from src.utils import update_metric, upload_file, get_default_cols, submit_results, reset_rank
18
  from src.display.gradio_formatting import get_version_dropdown, get_search_bar, get_reranking_dropdown, \
19
  get_metric_dropdown, get_domain_dropdown, get_language_dropdown, get_anonymous_checkbox, get_revision_and_ts_checkbox, get_leaderboard_table, get_noreranking_dropdown
20
  from src.display.gradio_listener import set_listeners
@@ -108,9 +108,8 @@ with demo:
108
  show_anonymous = get_anonymous_checkbox()
109
  with gr.Row():
110
  show_revision_and_timestamp = get_revision_and_ts_checkbox()
111
-
112
  with gr.Tabs(elem_classes="tab-buttons") as sub_tabs:
113
- with gr.TabItem("Retriever + Reranker", id=10):
114
  with gr.Row():
115
  # search retrieval models
116
  with gr.Column():
@@ -149,17 +148,19 @@ with demo:
149
  leaderboard_table,
150
  queue=True
151
  )
152
- with gr.TabItem("Retriever Only", id=11):
153
- with gr.Column():
154
- search_bar_retriever = get_search_bar()
155
- selected_noreranker = get_noreranking_dropdown()
 
 
156
  lb_df_retriever = leaderboard_df_qa[leaderboard_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"]
157
  lb_df_retriever = reset_rank(lb_df_retriever)
158
- hidden_lb_db_retriever = original_df_qa[original_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"]
159
- hidden_lb_db_retriever = reset_rank(hidden_lb_db_retriever)
160
  lb_table_retriever = get_leaderboard_table(lb_df_retriever, types_qa)
161
  # Dummy leaderboard for handling the case when the user uses backspace key
162
- hidden_lb_table_retriever = get_leaderboard_table(hidden_lb_db_retriever, types_qa, visible=False)
 
 
163
 
164
  set_listeners(
165
  "qa",
@@ -188,7 +189,48 @@ with demo:
188
  lb_table_retriever,
189
  queue=True
190
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  with gr.TabItem("Long Doc", elem_id="long-doc-benchmark-tab-table", id=1):
193
  with gr.Row():
194
  with gr.Column(min_width=320):
@@ -211,7 +253,7 @@ with demo:
211
  with gr.Row():
212
  show_revision_and_timestamp = get_revision_and_ts_checkbox()
213
  with gr.Tabs(elem_classes="tab-buttons") as sub_tabs:
214
- with gr.TabItem("Retriever + Reranker", id=20):
215
  with gr.Row():
216
  with gr.Column():
217
  search_bar = get_search_bar()
@@ -255,11 +297,12 @@ with demo:
255
  lb_table,
256
  queue=True
257
  )
258
-
259
- with gr.TabItem("Retriever Only", id=21):
260
- with gr.Column():
261
- search_bar_retriever = get_search_bar()
262
- selected_noreranker = get_noreranking_dropdown()
 
263
  lb_df_retriever_long_doc = leaderboard_df_long_doc[
264
  leaderboard_df_long_doc[COL_NAME_RERANKING_MODEL] == "NoReranker"
265
  ]
@@ -300,6 +343,49 @@ with demo:
300
  lb_table_retriever_long_doc,
301
  queue=True
302
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
 
304
  with gr.TabItem("🚀Submit here!", elem_id="submit-tab-table", id=2):
305
  with gr.Column():
 
11
  from src.benchmarks import DOMAIN_COLS_QA, LANG_COLS_QA, DOMAIN_COLS_LONG_DOC, LANG_COLS_LONG_DOC, METRIC_LIST, \
12
  DEFAULT_METRIC_QA, DEFAULT_METRIC_LONG_DOC
13
  from src.display.css_html_js import custom_css
14
+ from src.display.utils import COL_NAME_IS_ANONYMOUS, COL_NAME_REVISION, COL_NAME_TIMESTAMP, COL_NAME_RERANKING_MODEL, COL_NAME_RETRIEVAL_MODEL
15
  from src.envs import API, EVAL_RESULTS_PATH, REPO_ID, RESULTS_REPO, TOKEN
16
  from src.read_evals import get_raw_eval_results, get_leaderboard_df
17
+ from src.utils import update_metric, upload_file, get_default_cols, submit_results, reset_rank, remove_html
18
  from src.display.gradio_formatting import get_version_dropdown, get_search_bar, get_reranking_dropdown, \
19
  get_metric_dropdown, get_domain_dropdown, get_language_dropdown, get_anonymous_checkbox, get_revision_and_ts_checkbox, get_leaderboard_table, get_noreranking_dropdown
20
  from src.display.gradio_listener import set_listeners
 
108
  show_anonymous = get_anonymous_checkbox()
109
  with gr.Row():
110
  show_revision_and_timestamp = get_revision_and_ts_checkbox()
 
111
  with gr.Tabs(elem_classes="tab-buttons") as sub_tabs:
112
+ with gr.TabItem("Retrieval + Reranking", id=10):
113
  with gr.Row():
114
  # search retrieval models
115
  with gr.Column():
 
148
  leaderboard_table,
149
  queue=True
150
  )
151
+ with gr.TabItem("Retrieval Only", id=11):
152
+ with gr.Row():
153
+ with gr.Column(scale=1):
154
+ search_bar_retriever = get_search_bar()
155
+ with gr.Column(scale=1):
156
+ selected_noreranker = get_noreranking_dropdown()
157
  lb_df_retriever = leaderboard_df_qa[leaderboard_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"]
158
  lb_df_retriever = reset_rank(lb_df_retriever)
 
 
159
  lb_table_retriever = get_leaderboard_table(lb_df_retriever, types_qa)
160
  # Dummy leaderboard for handling the case when the user uses backspace key
161
+ hidden_lb_df_retriever = original_df_qa[original_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"]
162
+ hidden_lb_df_retriever = reset_rank(hidden_lb_df_retriever)
163
+ hidden_lb_table_retriever = get_leaderboard_table(hidden_lb_df_retriever, types_qa, visible=False)
164
 
165
  set_listeners(
166
  "qa",
 
189
  lb_table_retriever,
190
  queue=True
191
  )
192
+ with gr.TabItem("Reranking Only", id=12):
193
+ lb_df_reranker = leaderboard_df_qa[leaderboard_df_qa[COL_NAME_RETRIEVAL_MODEL] == "BM25"]
194
+ lb_df_reranker = reset_rank(lb_df_reranker)
195
+ reranking_models_reranker = lb_df_reranker[COL_NAME_RERANKING_MODEL].apply(remove_html).unique().tolist()
196
+ with gr.Row():
197
+ with gr.Column(scale=1):
198
+ selected_rerankings_reranker = get_reranking_dropdown(reranking_models_reranker)
199
+ with gr.Column(scale=1):
200
+ search_bar_reranker = gr.Textbox(show_label=False, visible=False)
201
+ lb_table_reranker = get_leaderboard_table(lb_df_reranker, types_qa)
202
+ hidden_lb_df_reranker = original_df_qa[original_df_qa[COL_NAME_RETRIEVAL_MODEL] == "BM25"]
203
+ hidden_lb_df_reranker = reset_rank(hidden_lb_df_reranker)
204
+ hidden_lb_table_reranker = get_leaderboard_table(
205
+ hidden_lb_df_reranker, types_qa, visible=False
206
+ )
207
 
208
+ set_listeners(
209
+ "qa",
210
+ lb_table_reranker,
211
+ hidden_lb_table_reranker,
212
+ search_bar_reranker,
213
+ selected_domains,
214
+ selected_langs,
215
+ selected_rerankings_reranker,
216
+ show_anonymous,
217
+ show_revision_and_timestamp,
218
+ )
219
+ # set metric listener
220
+ selected_metric.change(
221
+ update_metric_qa,
222
+ [
223
+ selected_metric,
224
+ selected_domains,
225
+ selected_langs,
226
+ selected_rerankings_reranker,
227
+ search_bar_reranker,
228
+ show_anonymous,
229
+ show_revision_and_timestamp,
230
+ ],
231
+ lb_table_reranker,
232
+ queue=True
233
+ )
234
  with gr.TabItem("Long Doc", elem_id="long-doc-benchmark-tab-table", id=1):
235
  with gr.Row():
236
  with gr.Column(min_width=320):
 
253
  with gr.Row():
254
  show_revision_and_timestamp = get_revision_and_ts_checkbox()
255
  with gr.Tabs(elem_classes="tab-buttons") as sub_tabs:
256
+ with gr.TabItem("Retrieval + Reranking", id=20):
257
  with gr.Row():
258
  with gr.Column():
259
  search_bar = get_search_bar()
 
297
  lb_table,
298
  queue=True
299
  )
300
+ with gr.TabItem("Retrieval Only", id=21):
301
+ with gr.Row():
302
+ with gr.Column(scale=1):
303
+ search_bar_retriever = get_search_bar()
304
+ with gr.Column(scale=1):
305
+ selected_noreranker = get_noreranking_dropdown()
306
  lb_df_retriever_long_doc = leaderboard_df_long_doc[
307
  leaderboard_df_long_doc[COL_NAME_RERANKING_MODEL] == "NoReranker"
308
  ]
 
343
  lb_table_retriever_long_doc,
344
  queue=True
345
  )
346
+ with gr.TabItem("Reranking Only", id=22):
347
+ lb_df_reranker_ldoc = leaderboard_df_long_doc[
348
+ leaderboard_df_long_doc[COL_NAME_RETRIEVAL_MODEL] == "BM25"
349
+ ]
350
+ lb_df_reranker_ldoc = reset_rank(lb_df_reranker_ldoc)
351
+ reranking_models_reranker_ldoc = lb_df_reranker_ldoc[COL_NAME_RERANKING_MODEL].apply(remove_html).unique().tolist()
352
+ with gr.Row():
353
+ with gr.Column(scale=1):
354
+ selected_rerankings_reranker_ldoc = get_reranking_dropdown(reranking_models_reranker_ldoc)
355
+ with gr.Column(scale=1):
356
+ search_bar_reranker_ldoc = gr.Textbox(show_label=False, visible=False)
357
+ lb_table_reranker_ldoc = get_leaderboard_table(lb_df_reranker_ldoc, types_long_doc)
358
+ hidden_lb_df_reranker_ldoc = original_df_long_doc[original_df_long_doc[COL_NAME_RETRIEVAL_MODEL] == "BM25"]
359
+ hidden_lb_df_reranker_ldoc = reset_rank(hidden_lb_df_reranker_ldoc)
360
+ hidden_lb_table_reranker_ldoc = get_leaderboard_table(
361
+ hidden_lb_df_reranker_ldoc, types_long_doc, visible=False
362
+ )
363
+
364
+ set_listeners(
365
+ "long-doc",
366
+ lb_table_reranker_ldoc,
367
+ hidden_lb_table_reranker_ldoc,
368
+ search_bar_reranker_ldoc,
369
+ selected_domains,
370
+ selected_langs,
371
+ selected_rerankings_reranker_ldoc,
372
+ show_anonymous,
373
+ show_revision_and_timestamp,
374
+ )
375
+ selected_metric.change(
376
+ update_metric_long_doc,
377
+ [
378
+ selected_metric,
379
+ selected_domains,
380
+ selected_langs,
381
+ selected_rerankings_reranker_ldoc,
382
+ search_bar_reranker_ldoc,
383
+ show_anonymous,
384
+ show_revision_and_timestamp,
385
+ ],
386
+ lb_table_reranker_ldoc,
387
+ queue=True
388
+ )
389
 
390
  with gr.TabItem("🚀Submit here!", elem_id="submit-tab-table", id=2):
391
  with gr.Column():