kz209 commited on
Commit
0b41ab5
β€’
1 Parent(s): 899db21

create leaderboard

Browse files
Files changed (2) hide show
  1. app.py +4 -5
  2. pages/leaderboard.py +52 -0
app.py CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
2
 
3
  from pages.arena import create_arena
4
  from pages.summarization_example import create_summarization_interface
 
5
 
6
  def welcome_message():
7
  return """
@@ -25,11 +26,9 @@ with gr.Blocks() as demo:
25
  create_arena()
26
  with gr.TabItem("Summarization"):
27
  create_summarization_interface()
28
- # with gr.TabItem("Page 3"):
29
- # page3()
30
 
31
 
32
  if __name__ == "__main__":
33
- demo.launch(server_port=7860)
34
-
35
- #iface.launch() # launch the Gradio app
 
2
 
3
  from pages.arena import create_arena
4
  from pages.summarization_example import create_summarization_interface
5
+ from pages.leaderboard import create_leaderboard
6
 
7
  def welcome_message():
8
  return """
 
26
  create_arena()
27
  with gr.TabItem("Summarization"):
28
  create_summarization_interface()
29
+ with gr.TabItem("Leaderboard"):
30
+ create_leaderboard()
31
 
32
 
33
  if __name__ == "__main__":
34
+ demo.launch()
 
 
pages/leaderboard.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+
4
+ # Sample data for the leaderboard
5
+ data = {
6
+ 'Rank': [1, 2, 3, 4, 5],
7
+ 'Methods': ['METHOD1_PLACEHOLDER', 'METHOD2_PLACEHOLDER', 'METHOD3_PLACEHOLDER', 'METHOD4_PLACEHOLDER', 'METHOD5_PLACEHOLDER'],
8
+ 'METRIC1_PLACEHOLDER Score': [1287, 1272, 1267, 1262, 1258],
9
+ 'METRIC2_PLACEHOLDER Score': [56905, 24913, 42981, 49828, 55567],
10
+ 'METRIC3_PLACEHOLDER Score': [3423, 3423, 2152, 4353, 2342],
11
+ 'Authors': ['AUTHOR1_PLACEHOLDER', 'AUTHOR2_PLACEHOLDER', 'AUTHOR3_PLACEHOLDER', 'AUTHOR4_PLACEHOLDER', 'AUTHOR5_PLACEHOLDER'],
12
+ }
13
+
14
+ df = pd.DataFrame(data)
15
+
16
+ def update_leaderboard(sort_by):
17
+ # In a real implementation, this would filter the data based on the category
18
+ sorted_df = df.sort_values(by=sort_by, ascending=False)
19
+
20
+ # Update ranks based on new sorting
21
+ sorted_df['Rank'] = range(1, len(sorted_df) + 1)
22
+
23
+ # Convert DataFrame to HTML with clickable headers for sorting
24
+ html = sorted_df.to_html(index=False, escape=False)
25
+
26
+ # Add sorting links to column headers
27
+ for column in sorted_df.columns:
28
+ html = html.replace(f'<th>{column}</th>',
29
+ f'<th><a href="#" onclick="sortBy(\'{column}\'); return false;">{column}</a></th>')
30
+
31
+ return html
32
+
33
+ def create_leaderboard():
34
+ with gr.Blocks(css="#leaderboard table { width: 100%; } #leaderboard th, #leaderboard td { padding: 8px; text-align: left; }") as demo:
35
+ gr.Markdown("# πŸ† Chris-Project Summarization Arena Leaderboard")
36
+
37
+ with gr.Row():
38
+ gr.Markdown("[Blog](placeholder) | [GitHub](placeholder) | [Paper](placeholder) | [Dataset](placeholder) | [Twitter](placeholder) | [Discord](placeholder)")
39
+
40
+ gr.Markdown("Welcome to our open platform for evaluating LLM summarization capabilities. We use the DATASET_NAME_PLACEHOLDER dataset to generate summaries with MODEL_NAME_PLACEHOLDER. These summaries are then evaluated by STRONGER_MODEL_NAME_PLACEHOLDER using the METRIC1_PLACEHOLDER and METRIC2_PLACEHOLDER metrics")
41
+
42
+ sort_by = gr.Dropdown(list(df.columns), label="Sort by", value="Rank")
43
+
44
+ stats = gr.Markdown("**Performance**\n\n**methods**: 4, **questions**: 150")
45
+
46
+ leaderboard = gr.HTML(update_leaderboard("Rank"), elem_id="leaderboard")
47
+
48
+ sort_by.change(update_leaderboard, inputs=[sort_by], outputs=[leaderboard])
49
+
50
+ gr.Markdown("Code to recreate leaderboard tables and plots in this [notebook](https://colab.research.google.com/drive/1RAWb22-PFNI-X1gPVzc927jv7GOEcmaB). You can contribute your vote at [chat.lmsys.org](https://chat.lmsys.org)!")
51
+
52
+ return demo