Spaces:
Sleeping
Sleeping
kz209
commited on
Commit
β’
0b41ab5
1
Parent(s):
899db21
create leaderboard
Browse files- app.py +4 -5
- pages/leaderboard.py +52 -0
app.py
CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
|
|
2 |
|
3 |
from pages.arena import create_arena
|
4 |
from pages.summarization_example import create_summarization_interface
|
|
|
5 |
|
6 |
def welcome_message():
|
7 |
return """
|
@@ -25,11 +26,9 @@ with gr.Blocks() as demo:
|
|
25 |
create_arena()
|
26 |
with gr.TabItem("Summarization"):
|
27 |
create_summarization_interface()
|
28 |
-
|
29 |
-
|
30 |
|
31 |
|
32 |
if __name__ == "__main__":
|
33 |
-
demo.launch(
|
34 |
-
|
35 |
-
#iface.launch() # launch the Gradio app
|
|
|
2 |
|
3 |
from pages.arena import create_arena
|
4 |
from pages.summarization_example import create_summarization_interface
|
5 |
+
from pages.leaderboard import create_leaderboard
|
6 |
|
7 |
def welcome_message():
|
8 |
return """
|
|
|
26 |
create_arena()
|
27 |
with gr.TabItem("Summarization"):
|
28 |
create_summarization_interface()
|
29 |
+
with gr.TabItem("Leaderboard"):
|
30 |
+
create_leaderboard()
|
31 |
|
32 |
|
33 |
if __name__ == "__main__":
|
34 |
+
demo.launch()
|
|
|
|
pages/leaderboard.py
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
+
|
4 |
+
# Sample data for the leaderboard
|
5 |
+
data = {
|
6 |
+
'Rank': [1, 2, 3, 4, 5],
|
7 |
+
'Methods': ['METHOD1_PLACEHOLDER', 'METHOD2_PLACEHOLDER', 'METHOD3_PLACEHOLDER', 'METHOD4_PLACEHOLDER', 'METHOD5_PLACEHOLDER'],
|
8 |
+
'METRIC1_PLACEHOLDER Score': [1287, 1272, 1267, 1262, 1258],
|
9 |
+
'METRIC2_PLACEHOLDER Score': [56905, 24913, 42981, 49828, 55567],
|
10 |
+
'METRIC3_PLACEHOLDER Score': [3423, 3423, 2152, 4353, 2342],
|
11 |
+
'Authors': ['AUTHOR1_PLACEHOLDER', 'AUTHOR2_PLACEHOLDER', 'AUTHOR3_PLACEHOLDER', 'AUTHOR4_PLACEHOLDER', 'AUTHOR5_PLACEHOLDER'],
|
12 |
+
}
|
13 |
+
|
14 |
+
df = pd.DataFrame(data)
|
15 |
+
|
16 |
+
def update_leaderboard(sort_by):
|
17 |
+
# In a real implementation, this would filter the data based on the category
|
18 |
+
sorted_df = df.sort_values(by=sort_by, ascending=False)
|
19 |
+
|
20 |
+
# Update ranks based on new sorting
|
21 |
+
sorted_df['Rank'] = range(1, len(sorted_df) + 1)
|
22 |
+
|
23 |
+
# Convert DataFrame to HTML with clickable headers for sorting
|
24 |
+
html = sorted_df.to_html(index=False, escape=False)
|
25 |
+
|
26 |
+
# Add sorting links to column headers
|
27 |
+
for column in sorted_df.columns:
|
28 |
+
html = html.replace(f'<th>{column}</th>',
|
29 |
+
f'<th><a href="#" onclick="sortBy(\'{column}\'); return false;">{column}</a></th>')
|
30 |
+
|
31 |
+
return html
|
32 |
+
|
33 |
+
def create_leaderboard():
|
34 |
+
with gr.Blocks(css="#leaderboard table { width: 100%; } #leaderboard th, #leaderboard td { padding: 8px; text-align: left; }") as demo:
|
35 |
+
gr.Markdown("# π Chris-Project Summarization Arena Leaderboard")
|
36 |
+
|
37 |
+
with gr.Row():
|
38 |
+
gr.Markdown("[Blog](placeholder) | [GitHub](placeholder) | [Paper](placeholder) | [Dataset](placeholder) | [Twitter](placeholder) | [Discord](placeholder)")
|
39 |
+
|
40 |
+
gr.Markdown("Welcome to our open platform for evaluating LLM summarization capabilities. We use the DATASET_NAME_PLACEHOLDER dataset to generate summaries with MODEL_NAME_PLACEHOLDER. These summaries are then evaluated by STRONGER_MODEL_NAME_PLACEHOLDER using the METRIC1_PLACEHOLDER and METRIC2_PLACEHOLDER metrics")
|
41 |
+
|
42 |
+
sort_by = gr.Dropdown(list(df.columns), label="Sort by", value="Rank")
|
43 |
+
|
44 |
+
stats = gr.Markdown("**Performance**\n\n**methods**: 4, **questions**: 150")
|
45 |
+
|
46 |
+
leaderboard = gr.HTML(update_leaderboard("Rank"), elem_id="leaderboard")
|
47 |
+
|
48 |
+
sort_by.change(update_leaderboard, inputs=[sort_by], outputs=[leaderboard])
|
49 |
+
|
50 |
+
gr.Markdown("Code to recreate leaderboard tables and plots in this [notebook](https://colab.research.google.com/drive/1RAWb22-PFNI-X1gPVzc927jv7GOEcmaB). You can contribute your vote at [chat.lmsys.org](https://chat.lmsys.org)!")
|
51 |
+
|
52 |
+
return demo
|