from dataclasses import dataclass @dataclass class Task: code: str name: str metric: str higher_is_better: bool = True num_fewshot: int = 0 private_test: bool = False class Lambada(Task): code = "lambada_vi" name = "LAMBADA-vi" metric = "ppl" higher_is_better = False num_fewshot = 0 private_test: bool = True class Arc(Task): code = "arc_vi" name = "ARC-vi" metric = "acc_norm" higher_is_better = True num_fewshot = 25 private_test: bool = False class HellaSwag(Task): code = "hellaswag_vi" name = "HellaSwag-vi" metric = "acc_norm" higher_is_better = True num_fewshot = 10 private_test: bool = False class MMLU(Task): code = "mmlu_vi" name = "MMLU-vi" metric = "acc_norm" higher_is_better = True num_fewshot = 5 private_test: bool = False class TruthfulQA(Task): code = "truthfulqa_vi" name = "TruthfulQA-vi" metric = "mc2" higher_is_better = True num_fewshot = 0 private_test: bool = False class Grade12Exams(Task): code = "grade_12_exams_vi" name = "Grade 12 Exams" metric = "acc_norm" higher_is_better = True num_fewshot = 5 private_test: bool = False class IWSLT2023_en_vi(Task): code = "translation_vi" name = "IWSLT 2023 en-vi" metric = "bleu" higher_is_better = True num_fewshot = 0 private_test: bool = False class WikipediaQA(Task): code = "wikipediaqa_vi" name = "GeneralKnowledgeQA-vi" metric = "acc_norm" higher_is_better = True num_fewshot = 5 private_test: bool = True class Comprehension(Task): code = "comprehension_vi" name = "ComprehensionQA-vi" metric = "acc_norm" higher_is_better = True num_fewshot = 0 private_test: bool = True TASKS = [Arc, HellaSwag, MMLU, TruthfulQA] + [Lambada, WikipediaQA, Comprehension]