hieunguyen1053's picture
update XNLI
f86137f
raw
history blame
1.48 kB
from dataclasses import dataclass
@dataclass
class Task:
code: str
name: str
metric: str
higher_is_better: bool = True
num_fewshot: int = 0
class Lambada(Task):
code = "lambada_vi"
name = "LAMBADA"
metric = "ppl"
higher_is_better = False
num_fewshot = 0
class Arc(Task):
code = "arc_vi"
name = "ARC"
metric = "acc_norm"
higher_is_better = True
num_fewshot = 25
class HellaSwag(Task):
code = "hellaswag_vi"
name = "HellaSwag"
metric = "acc_norm"
higher_is_better = True
num_fewshot = 10
class MMLU(Task):
code = "mmlu_vi"
name = "MMLU"
metric = "acc_norm"
higher_is_better = True
num_fewshot = 5
class TruthfulQA(Task):
code = "truthfulqa_vi"
name = "TruthfulQA"
metric = "mc2"
higher_is_better = True
num_fewshot = 0
class Grade12Exams(Task):
code = "grade_12_exams_vi"
name = "Grade 12 Exams"
metric = "acc_norm"
higher_is_better = True
num_fewshot = 5
class IWSLT2023_en_vi(Task):
code = "translation_vi"
name = "IWSLT 2023 en-vi"
metric = "bleu"
higher_is_better = True
num_fewshot = 0
class XNLI(Task):
code = "xnli_vi"
name = "XNLI"
metric = "acc"
higher_is_better = True
num_fewshot = 0
TASKS = [Lambada, Arc, HellaSwag, MMLU, TruthfulQA, Grade12Exams, IWSLT2023_en_vi, XNLI]
TASK_CODES = [task.code for task in TASKS]
TASK_TO_METRIC = {task.code: task.metric for task in TASKS}