Spaces:
Sleeping
Sleeping
Patrick Haller
commited on
Commit
Β·
983ff7e
1
Parent(s):
51a9af1
Init leaderboard
Browse files- app.py +73 -0
- current_results.json +53 -0
- text.py +26 -0
app.py
ADDED
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import sys
|
2 |
+
import json
|
3 |
+
|
4 |
+
import gradio as gr
|
5 |
+
import pandas as pd
|
6 |
+
|
7 |
+
from text import CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, INTRODUCTION_TEXT, TITLE_TEXT, TASK_DESCRIPTION
|
8 |
+
|
9 |
+
# with open("app.css") as f:
|
10 |
+
# css_code = f.read()
|
11 |
+
|
12 |
+
demo = gr.Blocks()
|
13 |
+
|
14 |
+
with open("current_results.json") as f:
|
15 |
+
result_list = json.load(f)
|
16 |
+
|
17 |
+
df = pd.DataFrame(result_list)
|
18 |
+
|
19 |
+
df["Model"] = df.apply(lambda x: f"<a style='text-decoration: underline' href='{x['link']}'>{x['Model']}</a>" if isinstance(x["link"], str) else x["Model"], axis=1)
|
20 |
+
|
21 |
+
# Sort columns by aoc_original, aoc_leet, euler_original, euler_story
|
22 |
+
df = df[["Model", "instruction_only", "aoc_original", "aoc_leet", "euler_original", "euler_story"]]
|
23 |
+
|
24 |
+
df["instruction_only"] = df["instruction_only"].map({True: 1, False: 0})
|
25 |
+
|
26 |
+
|
27 |
+
average_scores = df.iloc[:, 2:].mean(axis=1).round(2)
|
28 |
+
|
29 |
+
# Replace Column names
|
30 |
+
df.columns = ["Model", "Evaluation", "AOC Original",
|
31 |
+
"AOC Leet", "Euler Original", "Euler Story"]
|
32 |
+
|
33 |
+
average_scores = df.iloc[:, 2:].mean(axis=1).round(2)
|
34 |
+
df.insert(loc=2, column="β¬οΈ Average", value=average_scores)
|
35 |
+
df = df.sort_values(by=["Evaluation", "β¬οΈ Average"], ascending=[True, False])
|
36 |
+
df["Evaluation"] = df["Evaluation"].map({1: "πΆ", 0: "π©"})
|
37 |
+
|
38 |
+
with demo:
|
39 |
+
gr.HTML(f"<h2 style='text-align: center'>{TITLE_TEXT}</h2>")
|
40 |
+
# gr.HTML('<hr>')
|
41 |
+
gr.HTML(f"<h3>{INTRODUCTION_TEXT}<h3>")
|
42 |
+
gr.HTML('<hr style="border-top: 3px dotted #bbb" class="dotted">')
|
43 |
+
|
44 |
+
gr.HTML("<h3>π Results</h3>")
|
45 |
+
gr.components.Dataframe(
|
46 |
+
value=df,
|
47 |
+
datatype=["html"]
|
48 |
+
|
49 |
+
)
|
50 |
+
|
51 |
+
gr.HTML("<h3>Legend</h3>")
|
52 |
+
gr.HTML("<p>πΆ: Evaluated only on the first part of each AoC day</p>")
|
53 |
+
gr.HTML("<p>π©: Complete Evaluation</p>")
|
54 |
+
|
55 |
+
# with gr.Row():
|
56 |
+
# with gr.Accordion("Task", open=True):
|
57 |
+
# with gr.Row():
|
58 |
+
# with gr.Column(scale=1):
|
59 |
+
# gr.Image("assets/front.png")
|
60 |
+
# with gr.Column(scale=4):
|
61 |
+
# gr.Markdown(TASK_DESCRIPTION)
|
62 |
+
|
63 |
+
with gr.Row():
|
64 |
+
with gr.Accordion("π Citation", open=False):
|
65 |
+
citation_button = gr.Textbox(
|
66 |
+
value=CITATION_BUTTON_TEXT,
|
67 |
+
label=CITATION_BUTTON_LABEL,
|
68 |
+
lines=20,
|
69 |
+
elem_id="citation-button",
|
70 |
+
show_copy_button=True,
|
71 |
+
)
|
72 |
+
|
73 |
+
demo.launch()
|
current_results.json
ADDED
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"Model": "Mistral-7B-Instruct-v0.1",
|
4 |
+
"euler_original": 0.37,
|
5 |
+
"euler_story": 0.12,
|
6 |
+
"aoc_original": 3.0,
|
7 |
+
"aoc_leet": 3.0,
|
8 |
+
"instruction_only": true,
|
9 |
+
"link": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1"
|
10 |
+
},
|
11 |
+
{
|
12 |
+
"Model": "Mixtral-8x7B-Instruct-v0.1",
|
13 |
+
"euler_original": 2.86,
|
14 |
+
"euler_story": 2.23,
|
15 |
+
"aoc_original": 8.67,
|
16 |
+
"aoc_leet": 8.42,
|
17 |
+
"instruction_only": false,
|
18 |
+
"link": "https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1"
|
19 |
+
},
|
20 |
+
{
|
21 |
+
"Model": "chat_bison",
|
22 |
+
"euler_story": 0.62,
|
23 |
+
"euler_original": 2.44,
|
24 |
+
"aoc_leet": 13.78,
|
25 |
+
"aoc_original": 17.09,
|
26 |
+
"instruction_only": false
|
27 |
+
},
|
28 |
+
{
|
29 |
+
"Model": "codechat_bison",
|
30 |
+
"euler_story": 2.61,
|
31 |
+
"euler_original": 4.59,
|
32 |
+
"aoc_original": 21.17,
|
33 |
+
"aoc_leet": 17.6,
|
34 |
+
"instruction_only": false
|
35 |
+
},
|
36 |
+
{
|
37 |
+
"Model": "WizardCoder-Python-34B-V1.0",
|
38 |
+
"aoc_leet": 22.5,
|
39 |
+
"aoc_original": 24.0,
|
40 |
+
"euler_original": 2.61,
|
41 |
+
"euler_story": 2.48,
|
42 |
+
"instruction_only": true,
|
43 |
+
"link": "https://huggingface.co/WizardLM/WizardCoder-Python-34B-V1.0"
|
44 |
+
},
|
45 |
+
{
|
46 |
+
"Model": "gpt3.5",
|
47 |
+
"euler_original": 8.19,
|
48 |
+
"euler_story": 6.95,
|
49 |
+
"aoc_leet": 29.85,
|
50 |
+
"aoc_original": 50.0,
|
51 |
+
"instruction_only": false
|
52 |
+
}
|
53 |
+
]
|
text.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
TITLE_TEXT = "PECC - Problem Extraction and Coding Challenges Evaluation Benchmark"
|
2 |
+
|
3 |
+
INTRODUCTION_TEXT = """π PECC: An extensive benchmark centered on code generation from narrative-embedded problem descriptions. Unlike prior benchmarks that evaluate code generation using specific instructions, our dataset requires models to comprehend, extract requirements, and produce the essential code for problem-solving. This approach necessitates syntactically accurate programs and demands reading comprehension skills to derive the desired solution."""
|
4 |
+
|
5 |
+
TASK_DESCRIPTION = """## Task Description
|
6 |
+
The task for the model is to generate directly executable python code.
|
7 |
+
|
8 |
+
### Instruction
|
9 |
+
The model is first prompted with a system prompt, which is a short description of the problem. The model is then asked to generate the python code that solves the problem.
|
10 |
+
|
11 |
+
### Task
|
12 |
+
The model receives the task itself.
|
13 |
+
"""
|
14 |
+
|
15 |
+
CITATION_BUTTON_LABEL = "Copy the following snippet to cite our paper"
|
16 |
+
|
17 |
+
CITATION_BUTTON_TEXT = r"""
|
18 |
+
@misc{pecc,
|
19 |
+
author = {Patrick Haller and Jonas Golde and Alan Akbik},
|
20 |
+
title = {PECC - Problem Extraction and Coding Challenges},
|
21 |
+
year = {2024},
|
22 |
+
publisher = {GitHub},
|
23 |
+
journal = {GitHub repository},
|
24 |
+
howpublished = {}
|
25 |
+
}
|
26 |
+
"""
|