Spaces:
Runtime error
Runtime error
Aaron Mueller
commited on
Commit
β’
a03986e
1
Parent(s):
61202b8
update citation/submission text
Browse files- app.py +1 -79
- src/about.py +14 -5
app.py
CHANGED
@@ -115,85 +115,7 @@ with demo:
|
|
115 |
with gr.TabItem("π About", elem_id="llm-benchmark-tab-table", id=4):
|
116 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
117 |
|
118 |
-
|
119 |
-
with gr.Column():
|
120 |
-
with gr.Row():
|
121 |
-
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
|
122 |
-
|
123 |
-
with gr.Column():
|
124 |
-
with gr.Accordion(
|
125 |
-
f"β
Finished Evaluations ({len(finished_eval_queue_df)})",
|
126 |
-
open=False,
|
127 |
-
):
|
128 |
-
with gr.Row():
|
129 |
-
finished_eval_table = gr.components.Dataframe(
|
130 |
-
value=finished_eval_queue_df,
|
131 |
-
headers=EVAL_COLS,
|
132 |
-
datatype=EVAL_TYPES,
|
133 |
-
row_count=5,
|
134 |
-
)
|
135 |
-
with gr.Accordion(
|
136 |
-
f"π Running Evaluation Queue ({len(running_eval_queue_df)})",
|
137 |
-
open=False,
|
138 |
-
):
|
139 |
-
with gr.Row():
|
140 |
-
running_eval_table = gr.components.Dataframe(
|
141 |
-
value=running_eval_queue_df,
|
142 |
-
headers=EVAL_COLS,
|
143 |
-
datatype=EVAL_TYPES,
|
144 |
-
row_count=5,
|
145 |
-
)
|
146 |
-
|
147 |
-
with gr.Accordion(
|
148 |
-
f"β³ Pending Evaluation Queue ({len(pending_eval_queue_df)})",
|
149 |
-
open=False,
|
150 |
-
):
|
151 |
-
with gr.Row():
|
152 |
-
pending_eval_table = gr.components.Dataframe(
|
153 |
-
value=pending_eval_queue_df,
|
154 |
-
headers=EVAL_COLS,
|
155 |
-
datatype=EVAL_TYPES,
|
156 |
-
row_count=5,
|
157 |
-
)
|
158 |
-
with gr.Row():
|
159 |
-
gr.Markdown("# βοΈβ¨ Submit your predictions here!", elem_classes="markdown-text")
|
160 |
-
|
161 |
-
with gr.Row():
|
162 |
-
with gr.Column():
|
163 |
-
model_name_textbox = gr.Textbox(label="Model name.Β This will be displayed on the leaderboard.")
|
164 |
-
model_id_textbox = gr.Textbox(label="Huggingface model ID (if applicable). This looks like `owner/repo_id`, not like a URL.", placeholder="")
|
165 |
-
revision_name_textbox = gr.Textbox(label="Model revision commit", placeholder="main")
|
166 |
-
track_name = gr.Dropdown(
|
167 |
-
choices = ["strict", "strict-small", "multimodal"],
|
168 |
-
label = "Track",
|
169 |
-
multiselect=False,
|
170 |
-
value=None,
|
171 |
-
interactive=True
|
172 |
-
)
|
173 |
-
|
174 |
-
predictions_data = gr.State()
|
175 |
-
upload_button = gr.UploadButton(label="Upload predictions", file_types=[".json", ".gz"], file_count="single")
|
176 |
-
upload_button.upload(
|
177 |
-
fn=process_json,
|
178 |
-
inputs=upload_button,
|
179 |
-
outputs=predictions_data,
|
180 |
-
api_name="upload_json"
|
181 |
-
)
|
182 |
-
|
183 |
-
submit_button = gr.Button("Submit Eval")
|
184 |
-
submission_result = gr.Markdown()
|
185 |
-
submit_button.click(
|
186 |
-
add_new_eval,
|
187 |
-
[
|
188 |
-
model_name_textbox,
|
189 |
-
model_id_textbox,
|
190 |
-
revision_name_textbox,
|
191 |
-
track_name,
|
192 |
-
predictions_data,
|
193 |
-
],
|
194 |
-
submission_result,
|
195 |
-
)
|
196 |
-
|
197 |
with gr.Row():
|
198 |
with gr.Accordion("π Citation", open=False):
|
199 |
citation_button = gr.Textbox(
|
|
|
115 |
with gr.TabItem("π About", elem_id="llm-benchmark-tab-table", id=4):
|
116 |
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
|
117 |
|
118 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
with gr.Row():
|
120 |
with gr.Accordion("π Citation", open=False):
|
121 |
citation_button = gr.Textbox(
|
src/about.py
CHANGED
@@ -41,12 +41,11 @@ The leaderboards for each track of the 2024 BabyLM Challenge.
|
|
41 |
|
42 |
# Which evaluations are you running? how can people reproduce what you have?
|
43 |
LLM_BENCHMARKS_TEXT = f"""
|
44 |
-
|
45 |
-
This leaderboard accepts predictions files as input, and uploads the results to the leaderboard. The logic is the same as in the `score_predictions.py` script from the BabyLM 2024 evaluation pipeline repository.
|
46 |
"""
|
47 |
|
48 |
EVALUATION_QUEUE_TEXT = """
|
49 |
-
## Some good practices before
|
50 |
|
51 |
Make sure you can get scores from your prediction using the `score_predictions.py` script.
|
52 |
```bash
|
@@ -56,10 +55,20 @@ python score_predictions.py path/to/your/predictions.json.gz
|
|
56 |
```
|
57 |
If this step fails, follow the error messages to debug your model before submitting it. It's likely that either (i) some results are missing, or (ii) the results are incorrectly formatted.
|
58 |
|
59 |
-
Make sure your model has an open license! This is a leaderboard that is meant to advance research on language modeling, and we'd love for as many people as possible to know they can use your model
|
|
|
|
|
|
|
60 |
"""
|
61 |
|
62 |
CITATION_BUTTON_LABEL = "If you would like to cite these results, please cite the 2024 BabyLM Findings paper, as well as the authors of the model(s) whose results you cite!"
|
63 |
CITATION_BUTTON_TEXT = r"""
|
64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
"""
|
|
|
41 |
|
42 |
# Which evaluations are you running? how can people reproduce what you have?
|
43 |
LLM_BENCHMARKS_TEXT = f"""
|
44 |
+
This leaderboard displays scores from the 2024 BabyLM Challenge. Each track has its own tab.
|
|
|
45 |
"""
|
46 |
|
47 |
EVALUATION_QUEUE_TEXT = """
|
48 |
+
## Some good practices before requesting a model upload:
|
49 |
|
50 |
Make sure you can get scores from your prediction using the `score_predictions.py` script.
|
51 |
```bash
|
|
|
55 |
```
|
56 |
If this step fails, follow the error messages to debug your model before submitting it. It's likely that either (i) some results are missing, or (ii) the results are incorrectly formatted.
|
57 |
|
58 |
+
Make sure your model has an open license! This is a leaderboard that is meant to advance research on language modeling, and we'd love for as many people as possible to know they can use your model.
|
59 |
+
|
60 |
+
Once these steps have been followed, get in touch with the organizers with your predictions file(s), and the scores you've obtained.
|
61 |
+
We'll verify that we can match your scores, and then upload to the leaderboard. Optionally, you can give us your preferred model display name for the leaderboard, and a link to your model on HuggingFace.
|
62 |
"""
|
63 |
|
64 |
CITATION_BUTTON_LABEL = "If you would like to cite these results, please cite the 2024 BabyLM Findings paper, as well as the authors of the model(s) whose results you cite!"
|
65 |
CITATION_BUTTON_TEXT = r"""
|
66 |
+
@article{hu2024findingssecondbabylmchallenge,
|
67 |
+
title={Findings of the Second BabyLM Challenge: Sample-Efficient Pretraining on Developmentally Plausible Corpora},
|
68 |
+
author={Michael Y. Hu and Aaron Mueller and Candace Ross and Adina Williams and Tal Linzen and Chengxu Zhuang and Ryan Cotterell and Leshem Choshen and Alex Warstadt and Ethan Gotlieb Wilcox},
|
69 |
+
year={2024},
|
70 |
+
journal={Computing Research Repository},
|
71 |
+
volume={arXiv:2412.05149},
|
72 |
+
url={https://arxiv.org/abs/2412.05149},
|
73 |
+
}
|
74 |
"""
|