Spaces:
AIR-Bench
/
Running on CPU Upgrade

nan commited on
Commit
e050b39
·
1 Parent(s): 7845083

feat: implement the version selector for qa retrieval only

Browse files
Files changed (1) hide show
  1. app.py +24 -16
app.py CHANGED
@@ -14,19 +14,21 @@ from src.display.css_html_js import custom_css
14
  from src.envs import (
15
  API,
16
  EVAL_RESULTS_PATH,
17
- REPO_ID, DEFAULT_METRIC_QA, DEFAULT_METRIC_LONG_DOC, METRIC_LIST, LATEST_BENCHMARK_VERSION
18
  )
19
  from src.loaders import (
20
  load_eval_results
21
  )
22
  from src.utils import (
23
  update_metric,
24
- set_listeners
 
25
  )
26
  from src.display.gradio_formatting import (
27
  get_version_dropdown,
28
  get_search_bar,
29
  get_reranking_dropdown,
 
30
  get_metric_dropdown,
31
  get_domain_dropdown,
32
  get_language_dropdown,
@@ -110,11 +112,9 @@ with demo:
110
  # select domain
111
  with gr.Row():
112
  selected_domains = get_domain_dropdown(QABenchmarks[datastore.slug])
113
- # selected_domains = get_domain_dropdown(QABenchmarks["2404"])
114
  # select language
115
  with gr.Row():
116
  selected_langs = get_language_dropdown(QABenchmarks[datastore.slug])
117
- # selected_langs = get_language_dropdown(QABenchmarks["2404"])
118
  with gr.Column():
119
  # select the metric
120
  selected_metric = get_metric_dropdown(METRIC_LIST, DEFAULT_METRIC_QA)
@@ -132,21 +132,22 @@ with demo:
132
  with gr.Column():
133
  selected_rerankings = get_reranking_dropdown(datastore.reranking_models)
134
  # shown_table
135
- leaderboard_table = get_leaderboard_table(
136
  datastore.leaderboard_df_qa, datastore.types_qa)
137
  # Dummy leaderboard for handling the case when the user uses backspace key
138
- hidden_leaderboard_table_for_search = get_leaderboard_table(
139
  datastore.raw_df_qa, datastore.types_qa, visible=False)
140
 
141
  selected_version.change(
142
  update_datastore,
143
  [selected_version,],
144
- [selected_domains, selected_langs, selected_rerankings, leaderboard_table, hidden_leaderboard_table_for_search]
145
  )
 
146
  set_listeners(
147
  "qa",
148
- leaderboard_table,
149
- hidden_leaderboard_table_for_search,
150
  search_bar,
151
  selected_version,
152
  selected_domains,
@@ -168,30 +169,37 @@ with demo:
168
  show_anonymous,
169
  show_revision_and_timestamp,
170
  ],
171
- leaderboard_table,
172
  queue=True
173
  )
174
 
175
- """
176
  with gr.TabItem("Retrieval Only", id=11):
177
  with gr.Row():
178
  with gr.Column(scale=1):
179
  search_bar_retriever = get_search_bar()
180
  with gr.Column(scale=1):
181
  selected_noreranker = get_noreranking_dropdown()
182
- lb_df_retriever = data["AIR-Bench_24.04"].leaderboard_df_qa[data["AIR-Bench_24.04"].leaderboard_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"]
 
183
  lb_df_retriever = reset_rank(lb_df_retriever)
184
- lb_table_retriever = get_leaderboard_table(lb_df_retriever, data["AIR-Bench_24.04"].types_qa)
185
  # Dummy leaderboard for handling the case when the user uses backspace key
186
- hidden_lb_df_retriever = data["AIR-Bench_24.04"].raw_df_qa[data["AIR-Bench_24.04"].raw_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"]
187
  hidden_lb_df_retriever = reset_rank(hidden_lb_df_retriever)
188
- hidden_lb_table_retriever = get_leaderboard_table(hidden_lb_df_retriever, data["AIR-Bench_24.04"].types_qa, visible=False)
 
 
 
 
 
 
189
 
190
  set_listeners(
191
  "qa",
192
  lb_table_retriever,
193
  hidden_lb_table_retriever,
194
  search_bar_retriever,
 
195
  selected_domains,
196
  selected_langs,
197
  selected_noreranker,
@@ -210,11 +218,11 @@ with demo:
210
  search_bar_retriever,
211
  show_anonymous,
212
  show_revision_and_timestamp,
213
- selected_version,
214
  ],
215
  lb_table_retriever,
216
  queue=True
217
  )
 
218
  with gr.TabItem("Reranking Only", id=12):
219
  lb_df_reranker = data["AIR-Bench_24.04"].leaderboard_df_qa[data["AIR-Bench_24.04"].leaderboard_df_qa[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK]
220
  lb_df_reranker = reset_rank(lb_df_reranker)
 
14
  from src.envs import (
15
  API,
16
  EVAL_RESULTS_PATH,
17
+ REPO_ID, DEFAULT_METRIC_QA, DEFAULT_METRIC_LONG_DOC, METRIC_LIST, LATEST_BENCHMARK_VERSION, COL_NAME_RERANKING_MODEL
18
  )
19
  from src.loaders import (
20
  load_eval_results
21
  )
22
  from src.utils import (
23
  update_metric,
24
+ set_listeners,
25
+ reset_rank
26
  )
27
  from src.display.gradio_formatting import (
28
  get_version_dropdown,
29
  get_search_bar,
30
  get_reranking_dropdown,
31
+ get_noreranking_dropdown,
32
  get_metric_dropdown,
33
  get_domain_dropdown,
34
  get_language_dropdown,
 
112
  # select domain
113
  with gr.Row():
114
  selected_domains = get_domain_dropdown(QABenchmarks[datastore.slug])
 
115
  # select language
116
  with gr.Row():
117
  selected_langs = get_language_dropdown(QABenchmarks[datastore.slug])
 
118
  with gr.Column():
119
  # select the metric
120
  selected_metric = get_metric_dropdown(METRIC_LIST, DEFAULT_METRIC_QA)
 
132
  with gr.Column():
133
  selected_rerankings = get_reranking_dropdown(datastore.reranking_models)
134
  # shown_table
135
+ lb_table = get_leaderboard_table(
136
  datastore.leaderboard_df_qa, datastore.types_qa)
137
  # Dummy leaderboard for handling the case when the user uses backspace key
138
+ hidden_lb_table = get_leaderboard_table(
139
  datastore.raw_df_qa, datastore.types_qa, visible=False)
140
 
141
  selected_version.change(
142
  update_datastore,
143
  [selected_version,],
144
+ [selected_domains, selected_langs, selected_rerankings, lb_table, hidden_lb_table]
145
  )
146
+
147
  set_listeners(
148
  "qa",
149
+ lb_table,
150
+ hidden_lb_table,
151
  search_bar,
152
  selected_version,
153
  selected_domains,
 
169
  show_anonymous,
170
  show_revision_and_timestamp,
171
  ],
172
+ lb_table,
173
  queue=True
174
  )
175
 
 
176
  with gr.TabItem("Retrieval Only", id=11):
177
  with gr.Row():
178
  with gr.Column(scale=1):
179
  search_bar_retriever = get_search_bar()
180
  with gr.Column(scale=1):
181
  selected_noreranker = get_noreranking_dropdown()
182
+
183
+ lb_df_retriever = datastore.leaderboard_df_qa[datastore.leaderboard_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"]
184
  lb_df_retriever = reset_rank(lb_df_retriever)
185
+ lb_table_retriever = get_leaderboard_table(lb_df_retriever, datastore.types_qa)
186
  # Dummy leaderboard for handling the case when the user uses backspace key
187
+ hidden_lb_df_retriever = datastore.raw_df_qa[datastore.raw_df_qa[COL_NAME_RERANKING_MODEL] == "NoReranker"]
188
  hidden_lb_df_retriever = reset_rank(hidden_lb_df_retriever)
189
+ hidden_lb_table_retriever = get_leaderboard_table(hidden_lb_df_retriever, datastore.types_qa, visible=False)
190
+
191
+ selected_version.change(
192
+ update_datastore,
193
+ [selected_version,],
194
+ [selected_domains, selected_langs, selected_rerankings, lb_table_retriever, hidden_lb_table_retriever]
195
+ )
196
 
197
  set_listeners(
198
  "qa",
199
  lb_table_retriever,
200
  hidden_lb_table_retriever,
201
  search_bar_retriever,
202
+ selected_version,
203
  selected_domains,
204
  selected_langs,
205
  selected_noreranker,
 
218
  search_bar_retriever,
219
  show_anonymous,
220
  show_revision_and_timestamp,
 
221
  ],
222
  lb_table_retriever,
223
  queue=True
224
  )
225
+ """
226
  with gr.TabItem("Reranking Only", id=12):
227
  lb_df_reranker = data["AIR-Bench_24.04"].leaderboard_df_qa[data["AIR-Bench_24.04"].leaderboard_df_qa[COL_NAME_RETRIEVAL_MODEL] == BM25_LINK]
228
  lb_df_reranker = reset_rank(lb_df_reranker)