cdminix commited on
Commit
4bc7be5
Β·
verified Β·
1 Parent(s): adc647c

add option to exclude environment from mean

Browse files
Files changed (1) hide show
  1. app.py +15 -1
app.py CHANGED
@@ -28,6 +28,15 @@ def filter_dfs(tags, lb):
28
  lb = lb[lb["Tags"].apply(lambda x: any(tag in x for tag in tags))]
29
  return lb
30
 
 
 
 
 
 
 
 
 
 
31
 
32
  def restart_space():
33
  API.restart_space(repo_id=REPO_ID)
@@ -129,7 +138,8 @@ agg_df = BenchmarkSuite.aggregate_df(results_df)
129
  agg_df = agg_df.pivot(index="dataset", columns="benchmark_category", values="score")
130
  agg_df.rename(columns={"OVERALL": "General"}, inplace=True)
131
  agg_df.columns = [x.capitalize() for x in agg_df.columns]
132
- agg_df["Mean"] = agg_df.mean(axis=1)
 
133
  # make sure mean is the first column
134
  agg_df = agg_df[["Mean"] + [col for col in agg_df.columns if col != "Mean"]]
135
  for col in agg_df.columns:
@@ -212,6 +222,9 @@ app = gr.Blocks(css=custom_css, title="TTS Benchmark Leaderboard")
212
  with app:
213
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
214
  with gr.TabItem("πŸ… TTSDB Scores", elem_id="llm-benchmark-tab-table", id=0):
 
 
 
215
  tags = gr.Dropdown(
216
  TAGS,
217
  value=[],
@@ -221,6 +234,7 @@ with app:
221
  )
222
  leaderboard = init_leaderboard(f_a_df)
223
  tags.change(filter_dfs, [tags, leaderboard], [leaderboard])
 
224
  with gr.TabItem("πŸ… Individual Benchmarks", elem_id="llm-benchmark-tab-table", id=1):
225
  tags = gr.Dropdown(
226
  TAGS,
 
28
  lb = lb[lb["Tags"].apply(lambda x: any(tag in x for tag in tags))]
29
  return lb
30
 
31
+ def change_mean(env, lb):
32
+ global f_b_df, f_a_df
33
+ lb = f_a_df.copy()
34
+ if env:
35
+ mean_cols = [col for col in lb.columns if str(col) not in ["Mean", "Environment", "Model", "Tags"]]
36
+ else:
37
+ mean_cols = [col for col in lb.columns if str(col) not in ["Mean", "Model", "Tags"]]
38
+ lb["Mean"] = lb[mean_cols].mean(axis=1)
39
+ return lb
40
 
41
  def restart_space():
42
  API.restart_space(repo_id=REPO_ID)
 
138
  agg_df = agg_df.pivot(index="dataset", columns="benchmark_category", values="score")
139
  agg_df.rename(columns={"OVERALL": "General"}, inplace=True)
140
  agg_df.columns = [x.capitalize() for x in agg_df.columns]
141
+ mean_cols = [col for col in agg_df.columns if str(col) not in ["Mean", "Environment", "Model", "Tags"]]
142
+ agg_df["Mean"] = agg_df[mean_cols].mean(axis=1)
143
  # make sure mean is the first column
144
  agg_df = agg_df[["Mean"] + [col for col in agg_df.columns if col != "Mean"]]
145
  for col in agg_df.columns:
 
222
  with app:
223
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
224
  with gr.TabItem("πŸ… TTSDB Scores", elem_id="llm-benchmark-tab-table", id=0):
225
+ with gr.Group():
226
+ env = gr.Checkbox(value=True, label="Exclude environment from mean.")
227
+ gr.Markdown("**Environment** measures how well the system can reproduce noise in the training data. This doesn't correlate with human judgements for 'naturalness'")
228
  tags = gr.Dropdown(
229
  TAGS,
230
  value=[],
 
234
  )
235
  leaderboard = init_leaderboard(f_a_df)
236
  tags.change(filter_dfs, [tags, leaderboard], [leaderboard])
237
+ env.change(change_mean, [env, leaderboard], [leaderboard])
238
  with gr.TabItem("πŸ… Individual Benchmarks", elem_id="llm-benchmark-tab-table", id=1):
239
  tags = gr.Dropdown(
240
  TAGS,