Spaces:
Running
Running
ZhangYuhan
commited on
Commit
•
49574e5
1
Parent(s):
d90c0f8
update leaderboard
Browse files- serve/leaderboard.py +46 -38
serve/leaderboard.py
CHANGED
@@ -95,7 +95,7 @@ def load_leaderboard_table_csv(filename, add_hyperlink=True):
|
|
95 |
df = df.drop(df[df["Key"].isnull()].index)
|
96 |
for col in df.columns:
|
97 |
if "Elo rating" in col:
|
98 |
-
print(col, df[col], type(df[col]), df[col] is not np.NaN)
|
99 |
df[col] = df[col].apply(lambda x: int(x) if (x != "-" and pd.notna(x)) else np.NaN)
|
100 |
|
101 |
if add_hyperlink and col == "Model":
|
@@ -174,10 +174,10 @@ def get_arena_table(arena_dfs, model_table_df):
|
|
174 |
# elo rating
|
175 |
num_battles = 0
|
176 |
for dim in arena_dfs.keys():
|
177 |
-
try:
|
178 |
-
|
179 |
-
except:
|
180 |
-
|
181 |
row.append(round(arena_dfs[dim].loc[model_name]["rating"], 2))
|
182 |
upper_diff = round(arena_dfs[dim].loc[model_name]["rating_q975"] - arena_dfs[dim].loc[model_name]["rating"])
|
183 |
lower_diff = round(arena_dfs[dim].loc[model_name]["rating"] - arena_dfs[dim].loc[model_name]["rating_q025"])
|
@@ -215,25 +215,26 @@ def make_arena_leaderboard_md(elo_results):
|
|
215 |
total_models = len(arena_df)
|
216 |
|
217 |
leaderboard_md = f"""
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
(Note: Only anonymous votes are considered here.)
|
222 |
-
|
223 |
Contribute the votes 🗳️ at [3DGen-Arena](https://huggingface.co/spaces/ZhangYuhan/3DGen-Arena)!
|
224 |
-
|
225 |
"""
|
226 |
return leaderboard_md
|
227 |
|
228 |
def make_full_leaderboard_md(elo_results):
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
|
|
|
|
233 |
|
234 |
leaderboard_md = f"""
|
235 |
-
Total #models: **{total_models}
|
236 |
-
|
|
|
|
|
237 |
"""
|
238 |
return leaderboard_md
|
239 |
|
@@ -251,7 +252,7 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
|
|
251 |
else:
|
252 |
with open(elo_results_file, "rb") as fin:
|
253 |
elo_results = pickle.load(fin)
|
254 |
-
|
255 |
# print(elo_results)
|
256 |
# print(elo_results.keys())
|
257 |
anony_elo_results, full_elo_results = {}, {}
|
@@ -266,6 +267,8 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
|
|
266 |
p2[dim] = anony_elo_results[dim]["battle_count_heatmap"]
|
267 |
p3[dim] = anony_elo_results[dim]["bootstrap_elo_rating"]
|
268 |
p4[dim] = anony_elo_results[dim]["average_win_rate_bar"]
|
|
|
|
|
269 |
|
270 |
md = make_leaderboard_md(anony_elo_results)
|
271 |
|
@@ -273,10 +276,11 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
|
|
273 |
|
274 |
if leaderboard_table_file:
|
275 |
model_table_df = load_leaderboard_table_csv(leaderboard_table_file)
|
|
|
276 |
with gr.Tabs() as tabs:
|
277 |
# arena table
|
278 |
arena_table_vals = get_arena_table(anony_arena_dfs, model_table_df)
|
279 |
-
with gr.Tab("Arena
|
280 |
md = make_arena_leaderboard_md(anony_elo_results)
|
281 |
gr.Markdown(md, elem_id="leaderboard_markdown")
|
282 |
gr.Dataframe(
|
@@ -308,25 +312,29 @@ def build_leaderboard_tab(elo_results_file, leaderboard_table_file, show_plot=Fa
|
|
308 |
column_widths=[50, 200, 100, 100, 100, 100, 100, 100, 100],
|
309 |
wrap=True,
|
310 |
)
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
|
|
|
|
|
|
|
|
330 |
if not show_plot:
|
331 |
gr.Markdown(
|
332 |
""" ## We are still collecting more votes on more models. The ranking will be updated very fruquently. Please stay tuned!
|
|
|
95 |
df = df.drop(df[df["Key"].isnull()].index)
|
96 |
for col in df.columns:
|
97 |
if "Elo rating" in col:
|
98 |
+
# print(col, df[col], type(df[col]), df[col] is not np.NaN)
|
99 |
df[col] = df[col].apply(lambda x: int(x) if (x != "-" and pd.notna(x)) else np.NaN)
|
100 |
|
101 |
if add_hyperlink and col == "Model":
|
|
|
174 |
# elo rating
|
175 |
num_battles = 0
|
176 |
for dim in arena_dfs.keys():
|
177 |
+
# try:
|
178 |
+
# print(arena_dfs[dim].loc[model_name])
|
179 |
+
# except:
|
180 |
+
# continue
|
181 |
row.append(round(arena_dfs[dim].loc[model_name]["rating"], 2))
|
182 |
upper_diff = round(arena_dfs[dim].loc[model_name]["rating_q975"] - arena_dfs[dim].loc[model_name]["rating"])
|
183 |
lower_diff = round(arena_dfs[dim].loc[model_name]["rating"] - arena_dfs[dim].loc[model_name]["rating_q025"])
|
|
|
215 |
total_models = len(arena_df)
|
216 |
|
217 |
leaderboard_md = f"""
|
218 |
+
Total #models: **{total_models}**. \n
|
219 |
+
Total #votes: **{int(total_votes)}** (Anonymous Votes only). \n
|
220 |
+
Last updated: {last_updated}. \n
|
|
|
|
|
221 |
Contribute the votes 🗳️ at [3DGen-Arena](https://huggingface.co/spaces/ZhangYuhan/3DGen-Arena)!
|
|
|
222 |
"""
|
223 |
return leaderboard_md
|
224 |
|
225 |
def make_full_leaderboard_md(elo_results):
|
226 |
+
total_votes = 0
|
227 |
+
for dim in elo_results.keys():
|
228 |
+
arena_df = elo_results[dim]["leaderboard_table_df"]
|
229 |
+
last_updated = elo_results[dim]["last_updated_datetime"]
|
230 |
+
total_votes += sum(arena_df["num_battles"].fillna(0)) // 2
|
231 |
+
total_models = len(arena_df)
|
232 |
|
233 |
leaderboard_md = f"""
|
234 |
+
Total #models: **{total_models}**. \n
|
235 |
+
Total #votes: **{int(total_votes)}** (Anonymous + Named Votes). \n
|
236 |
+
Last updated: {last_updated}.\n
|
237 |
+
Contribute the votes 🗳️ at [3DGen-Arena](https://huggingface.co/spaces/ZhangYuhan/3DGen-Arena)!
|
238 |
"""
|
239 |
return leaderboard_md
|
240 |
|
|
|
252 |
else:
|
253 |
with open(elo_results_file, "rb") as fin:
|
254 |
elo_results = pickle.load(fin)
|
255 |
+
|
256 |
# print(elo_results)
|
257 |
# print(elo_results.keys())
|
258 |
anony_elo_results, full_elo_results = {}, {}
|
|
|
267 |
p2[dim] = anony_elo_results[dim]["battle_count_heatmap"]
|
268 |
p3[dim] = anony_elo_results[dim]["bootstrap_elo_rating"]
|
269 |
p4[dim] = anony_elo_results[dim]["average_win_rate_bar"]
|
270 |
+
print(anony_arena_dfs[dim])
|
271 |
+
print(full_arena_dfs[dim])
|
272 |
|
273 |
md = make_leaderboard_md(anony_elo_results)
|
274 |
|
|
|
276 |
|
277 |
if leaderboard_table_file:
|
278 |
model_table_df = load_leaderboard_table_csv(leaderboard_table_file)
|
279 |
+
model_table_df_full = load_leaderboard_table_csv(str(leaderboard_table_file).replace('.csv', '_full.csv'))
|
280 |
with gr.Tabs() as tabs:
|
281 |
# arena table
|
282 |
arena_table_vals = get_arena_table(anony_arena_dfs, model_table_df)
|
283 |
+
with gr.Tab("Anony. Arena", id=0):
|
284 |
md = make_arena_leaderboard_md(anony_elo_results)
|
285 |
gr.Markdown(md, elem_id="leaderboard_markdown")
|
286 |
gr.Dataframe(
|
|
|
312 |
column_widths=[50, 200, 100, 100, 100, 100, 100, 100, 100],
|
313 |
wrap=True,
|
314 |
)
|
315 |
+
with gr.Tab("Full Arena", id=1):
|
316 |
+
md = make_full_leaderboard_md(full_elo_results)
|
317 |
+
gr.Markdown(md, elem_id="leaderboard_markdown")
|
318 |
+
full_table_vals = get_arena_table(full_arena_dfs, model_table_df_full)
|
319 |
+
gr.Dataframe(
|
320 |
+
headers=["Rank", "🤖 Model"] + [f"📈 {dim} Elo" for dim in anony_arena_dfs.keys()] + ["⭐ Avg. Arena Elo Ranking", "📮 Votes"],
|
321 |
+
datatype=[
|
322 |
+
"str",
|
323 |
+
"markdown",
|
324 |
+
"number",
|
325 |
+
"number",
|
326 |
+
"number",
|
327 |
+
"number",
|
328 |
+
"number",
|
329 |
+
"number",
|
330 |
+
"number"
|
331 |
+
],
|
332 |
+
value=full_table_vals,
|
333 |
+
elem_id="full_leaderboard_dataframe",
|
334 |
+
column_widths=[50, 200, 100, 100, 100, 100, 100, 100, 100],
|
335 |
+
height=700,
|
336 |
+
wrap=True,
|
337 |
+
)
|
338 |
if not show_plot:
|
339 |
gr.Markdown(
|
340 |
""" ## We are still collecting more votes on more models. The ranking will be updated very fruquently. Please stay tuned!
|