BabyLM 2024 Leaderboards

from dataclasses import dataclass
from enum import Enum

@dataclass
class Task:
    benchmark: str
    metric: str
    col_name: str


# Select your tasks here
# ---------------------------------------------------
class Tasks(Enum):
    # task_key in the json file, metric_key in the json file, name to display in the leaderboard 
    task0 = Task("blimp", "acc", "BLiMP")
    task1 = Task("blimp_supplement", "acc", "BLiMP Supplement")
    task2 = Task("glue", "acc", "(Super)GLUE")
    task3 = Task("ewok", "acc", "EWoK")

class TasksMultimodal(Enum):
    task0 = Task("blimp", "acc", "BLiMP")
    task1 = Task("blimp_supplement", "acc", "BLiMP Supplement")
    task2 = Task("glue", "acc", "(Super)GLUE")
    task3 = Task("ewok", "acc", "EWoK")
    task4 = Task("vqa", "acc", "VQA")
    task5 = Task("winoground", "acc", "Winoground")
    task6 = Task("devbench", "acc", "DevBench")

NUM_FEWSHOT = 0 # Change with your few shot
# ---------------------------------------------------


# Your leaderboard name
TITLE = """<h1 align="center" id="space-title">BabyLM 2024 Leaderboards</h1>"""

# What does your leaderboard evaluate?
INTRODUCTION_TEXT = """
The leaderboards for each track of the 2024 BabyLM Challenge.
"""

# Which evaluations are you running? how can people reproduce what you have?
LLM_BENCHMARKS_TEXT = f"""
## How it works
This leaderboard accepts predictions files as input, and uploads the results to the leaderboard. The logic is the same as in the `score_predictions.py` script from the BabyLM 2024 evaluation pipeline repository.
"""

EVALUATION_QUEUE_TEXT = """
## Some good practices before submitting a model:

Make sure you can get scores from your prediction using the `score_predictions.py` script.
```bash
git clone https://github.com/babylm/evaluation-pipeline-2024/
cd evaluation-pipeline-2024
python score_predictions.py path/to/your/predictions.json.gz
```
If this step fails, follow the error messages to debug your model before submitting it. It's likely that either (i) some results are missing, or (ii) the results are incorrectly formatted.

Make sure your model has an open license! This is a leaderboard that is meant to advance research on language modeling, and we'd love for as many people as possible to know they can use your model!
"""

CITATION_BUTTON_LABEL = "If you would like to cite these results, please cite the 2024 BabyLM Findings paper, as well as the authors of the model(s) whose results you cite!"
CITATION_BUTTON_TEXT = r"""
"""