Spaces:
Runtime error
Runtime error
from dataclasses import dataclass | |
from enum import Enum | |
class Task: | |
benchmark: str | |
metric: str | |
col_name: str | |
# Select your tasks here | |
# --------------------------------------------------- | |
class Tasks(Enum): | |
# task_key in the json file, metric_key in the json file, name to display in the leaderboard | |
task0 = Task("blimp", "acc", "BLiMP") | |
task1 = Task("blimp_supplement", "acc", "BLiMP Supplement") | |
task2 = Task("glue", "acc", "(Super)GLUE") | |
task3 = Task("ewok", "acc", "EWoK") | |
class TasksMultimodal(Enum): | |
task0 = Task("blimp", "acc", "BLiMP") | |
task1 = Task("blimp_supplement", "acc", "BLiMP Supplement") | |
task2 = Task("glue", "acc", "(Super)GLUE") | |
task3 = Task("ewok", "acc", "EWoK") | |
task4 = Task("vqa", "acc", "VQA") | |
task5 = Task("winoground", "acc", "Winoground") | |
task6 = Task("devbench", "acc", "DevBench") | |
NUM_FEWSHOT = 0 # Change with your few shot | |
# --------------------------------------------------- | |
# Your leaderboard name | |
TITLE = """<h1 align="center" id="space-title">BabyLM 2024 Leaderboards</h1>""" | |
# What does your leaderboard evaluate? | |
INTRODUCTION_TEXT = """ | |
The leaderboards for each track of the 2024 BabyLM Challenge. | |
""" | |
# Which evaluations are you running? how can people reproduce what you have? | |
LLM_BENCHMARKS_TEXT = f""" | |
## How it works | |
This leaderboard accepts predictions files as input, and uploads the results to the leaderboard. The logic is the same as in the `score_predictions.py` script from the BabyLM 2024 evaluation pipeline repository. | |
""" | |
EVALUATION_QUEUE_TEXT = """ | |
## Some good practices before submitting a model | |
### 1) Make sure you can get scores from your prediction using the `score_predictions.py` script. | |
```bash | |
git clone https://github.com/babylm/evaluation-pipeline-2024/ | |
cd evaluation-pipeline-2024 | |
python score_predictions.py path/to/your/predictions.json.gz | |
``` | |
If this step fails, follow the error messages to debug your model before submitting it. It's likely that either (i) some results are missing, or (ii) the results are incorrectly formatted. | |
### 3) Make sure your model has an open license! | |
This is a leaderboard that is meant to advance research on language modeling, and we'd love for as many people as possible to know they can use your model! | |
### 4) Fill up your model card | |
When we add extra information about models to the leaderboard, it will be automatically taken from the model card. | |
""" | |
CITATION_BUTTON_LABEL = "If you would like to cite these results, please cite the 2024 BabyLM Findings paper, as well as the authors of the model(s) whose results you cite!" | |
CITATION_BUTTON_TEXT = r""" | |
""" | |