ZhangYuhan commited on
Commit
49574e5
1 Parent(s): d90c0f8

update leaderboard

Browse files
Files changed (1) hide show
  1. serve/leaderboard.py +46 -38
serve/leaderboard.py CHANGED
@@ -95,7 +95,7 @@ def load_leaderboard_table_csv(filename, add_hyperlink=True):
95
  df = df.drop(df[df["Key"].isnull()].index)
96
  for col in df.columns:
97
  if "Elo rating" in col:
98
- print(col, df[col], type(df[col]), df[col] is not np.NaN)
99
  df[col] = df[col].apply(lambda x: int(x) if (x != "-" and pd.notna(x)) else np.NaN)
100
 
101
  if add_hyperlink and col == "Model":
@@ -174,10 +174,10 @@ def get_arena_table(arena_dfs, model_table_df):
174
  # elo rating
175
  num_battles = 0
176
  for dim in arena_dfs.keys():
177
- try:
178
- print(arena_dfs[dim].loc[model_name])
179
- except:
180
- continue
181
  row.append(round(arena_dfs[dim].loc[model_name]["rating"], 2))
182
  upper_diff = round(arena_dfs[dim].loc[model_name]["rating_q975"] - arena_dfs[dim].loc[model_name]["rating"])
183
  lower_diff = round(arena_dfs[dim].loc[model_name]["rating"] - arena_dfs[dim].loc[model_name]["rating_q025"])
@@ -215,25 +215,26 @@ def make_arena_leaderboard_md(elo_results):
215
  total_models = len(arena_df)
216
 
217
  leaderboard_md = f"""
218
-
219
-
220
- Total #models: **{total_models}**(anonymous). Total #votes: **{int(total_votes)}**. Last updated: {last_updated}. \n
221
- (Note: Only anonymous votes are considered here.)
222
-
223
  Contribute the votes 🗳️ at [3DGen-Arena](https://huggingface.co/spaces/ZhangYuhan/3DGen-Arena)!
224
-
225
  """
226
  return leaderboard_md
227
 
228
  def make_full_leaderboard_md(elo_results):
229
- arena_df = elo_results["leaderboard_table_df"]
230
- last_updated = elo_results["last_updated_datetime"]
231
- total_votes = sum(arena_df["num_battles"]) // 2
232
- total_models = len(arena_df)
 
 
233
 
234
  leaderboard_md = f"""
235
- Total #models: **{total_models}**(full:anonymous+open). Total #votes: **{total_votes}**. Last updated: {last_updated}.
236
-
 
 
237
  """
238
  return leaderboard_md
239
 
@@ -251,7 +252,7 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
251
  else:
252
  with open(elo_results_file, "rb") as fin:
253
  elo_results = pickle.load(fin)
254
-
255
  # print(elo_results)
256
  # print(elo_results.keys())
257
  anony_elo_results, full_elo_results = {}, {}
@@ -266,6 +267,8 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
266
  p2[dim] = anony_elo_results[dim]["battle_count_heatmap"]
267
  p3[dim] = anony_elo_results[dim]["bootstrap_elo_rating"]
268
  p4[dim] = anony_elo_results[dim]["average_win_rate_bar"]
 
 
269
 
270
  md = make_leaderboard_md(anony_elo_results)
271
 
@@ -273,10 +276,11 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
273
 
274
  if leaderboard_table_file:
275
  model_table_df = load_leaderboard_table_csv(leaderboard_table_file)
 
276
  with gr.Tabs() as tabs:
277
  # arena table
278
  arena_table_vals = get_arena_table(anony_arena_dfs, model_table_df)
279
- with gr.Tab("Arena Elo", id=0):
280
  md = make_arena_leaderboard_md(anony_elo_results)
281
  gr.Markdown(md, elem_id="leaderboard_markdown")
282
  gr.Dataframe(
@@ -308,25 +312,29 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
308
  column_widths=[50, 200, 100, 100, 100, 100, 100, 100, 100],
309
  wrap=True,
310
  )
311
- # with gr.Tab("Full Leaderboard", id=1):
312
- # md = make_full_leaderboard_md(full_elo_results)
313
- # gr.Markdown(md, elem_id="leaderboard_markdown")
314
- # full_table_vals = get_full_table(anony_arena_df, full_arena_df, model_table_df)
315
- # gr.Dataframe(
316
- # headers=[
317
- # "🤖 Model",
318
- # "⭐ Arena Elo (anony)",
319
- # "⭐ Arena Elo (full)",
320
- # "Organization",
321
- # "License",
322
- # ],
323
- # datatype=["markdown", "number", "number", "str", "str"],
324
- # value=full_table_vals,
325
- # elem_id="full_leaderboard_dataframe",
326
- # column_widths=[200, 100, 100, 100, 150, 150],
327
- # height=700,
328
- # wrap=True,
329
- # )
 
 
 
 
330
  if not show_plot:
331
  gr.Markdown(
332
  """ ## We are still collecting more votes on more models. The ranking will be updated very fruquently. Please stay tuned!
 
95
  df = df.drop(df[df["Key"].isnull()].index)
96
  for col in df.columns:
97
  if "Elo rating" in col:
98
+ # print(col, df[col], type(df[col]), df[col] is not np.NaN)
99
  df[col] = df[col].apply(lambda x: int(x) if (x != "-" and pd.notna(x)) else np.NaN)
100
 
101
  if add_hyperlink and col == "Model":
 
174
  # elo rating
175
  num_battles = 0
176
  for dim in arena_dfs.keys():
177
+ # try:
178
+ # print(arena_dfs[dim].loc[model_name])
179
+ # except:
180
+ # continue
181
  row.append(round(arena_dfs[dim].loc[model_name]["rating"], 2))
182
  upper_diff = round(arena_dfs[dim].loc[model_name]["rating_q975"] - arena_dfs[dim].loc[model_name]["rating"])
183
  lower_diff = round(arena_dfs[dim].loc[model_name]["rating"] - arena_dfs[dim].loc[model_name]["rating_q025"])
 
215
  total_models = len(arena_df)
216
 
217
  leaderboard_md = f"""
218
+ Total #models: **{total_models}**. \n
219
+ Total #votes: **{int(total_votes)}** (Anonymous Votes only). \n
220
+ Last updated: {last_updated}. \n
 
 
221
  Contribute the votes 🗳️ at [3DGen-Arena](https://huggingface.co/spaces/ZhangYuhan/3DGen-Arena)!
 
222
  """
223
  return leaderboard_md
224
 
225
  def make_full_leaderboard_md(elo_results):
226
+ total_votes = 0
227
+ for dim in elo_results.keys():
228
+ arena_df = elo_results[dim]["leaderboard_table_df"]
229
+ last_updated = elo_results[dim]["last_updated_datetime"]
230
+ total_votes += sum(arena_df["num_battles"].fillna(0)) // 2
231
+ total_models = len(arena_df)
232
 
233
  leaderboard_md = f"""
234
+ Total #models: **{total_models}**. \n
235
+ Total #votes: **{int(total_votes)}** (Anonymous + Named Votes). \n
236
+ Last updated: {last_updated}.\n
237
+ Contribute the votes 🗳️ at [3DGen-Arena](https://huggingface.co/spaces/ZhangYuhan/3DGen-Arena)!
238
  """
239
  return leaderboard_md
240
 
 
252
  else:
253
  with open(elo_results_file, "rb") as fin:
254
  elo_results = pickle.load(fin)
255
+
256
  # print(elo_results)
257
  # print(elo_results.keys())
258
  anony_elo_results, full_elo_results = {}, {}
 
267
  p2[dim] = anony_elo_results[dim]["battle_count_heatmap"]
268
  p3[dim] = anony_elo_results[dim]["bootstrap_elo_rating"]
269
  p4[dim] = anony_elo_results[dim]["average_win_rate_bar"]
270
+ print(anony_arena_dfs[dim])
271
+ print(full_arena_dfs[dim])
272
 
273
  md = make_leaderboard_md(anony_elo_results)
274
 
 
276
 
277
  if leaderboard_table_file:
278
  model_table_df = load_leaderboard_table_csv(leaderboard_table_file)
279
+ model_table_df_full = load_leaderboard_table_csv(str(leaderboard_table_file).replace('.csv', '_full.csv'))
280
  with gr.Tabs() as tabs:
281
  # arena table
282
  arena_table_vals = get_arena_table(anony_arena_dfs, model_table_df)
283
+ with gr.Tab("Anony. Arena", id=0):
284
  md = make_arena_leaderboard_md(anony_elo_results)
285
  gr.Markdown(md, elem_id="leaderboard_markdown")
286
  gr.Dataframe(
 
312
  column_widths=[50, 200, 100, 100, 100, 100, 100, 100, 100],
313
  wrap=True,
314
  )
315
+ with gr.Tab("Full Arena", id=1):
316
+ md = make_full_leaderboard_md(full_elo_results)
317
+ gr.Markdown(md, elem_id="leaderboard_markdown")
318
+ full_table_vals = get_arena_table(full_arena_dfs, model_table_df_full)
319
+ gr.Dataframe(
320
+ headers=["Rank", "🤖 Model"] + [f"📈 {dim} Elo" for dim in anony_arena_dfs.keys()] + ["⭐ Avg. Arena Elo Ranking", "📮 Votes"],
321
+ datatype=[
322
+ "str",
323
+ "markdown",
324
+ "number",
325
+ "number",
326
+ "number",
327
+ "number",
328
+ "number",
329
+ "number",
330
+ "number"
331
+ ],
332
+ value=full_table_vals,
333
+ elem_id="full_leaderboard_dataframe",
334
+ column_widths=[50, 200, 100, 100, 100, 100, 100, 100, 100],
335
+ height=700,
336
+ wrap=True,
337
+ )
338
  if not show_plot:
339
  gr.Markdown(
340
  """ ## We are still collecting more votes on more models. The ranking will be updated very fruquently. Please stay tuned!