Spaces:
Runtime error
Runtime error
File size: 1,895 Bytes
b0314f9 f8b127b b0314f9 0cf0987 b0314f9 f8b127b b0314f9 0cf0987 b0314f9 f8b127b b0314f9 0cf0987 b0314f9 f8b127b b0314f9 0cf0987 b0314f9 f8b127b b0314f9 0cf0987 b0314f9 f8b127b b0314f9 f8b127b b0314f9 f8b127b b0314f9 f8b127b 0cf0987 f8b127b 0cf0987 f8b127b f86137f f8b127b f86137f 0cf0987 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
from dataclasses import dataclass
@dataclass
class Task:
code: str
name: str
metric: str
higher_is_better: bool = True
num_fewshot: int = 0
private_test: bool = False
class Lambada(Task):
code = "lambada_vi"
name = "LAMBADA-vi"
metric = "ppl"
higher_is_better = False
num_fewshot = 0
private_test: bool = True
class Arc(Task):
code = "arc_vi"
name = "ARC-vi"
metric = "acc_norm"
higher_is_better = True
num_fewshot = 25
private_test: bool = False
class HellaSwag(Task):
code = "hellaswag_vi"
name = "HellaSwag-vi"
metric = "acc_norm"
higher_is_better = True
num_fewshot = 10
private_test: bool = False
class MMLU(Task):
code = "mmlu_vi"
name = "MMLU-vi"
metric = "acc_norm"
higher_is_better = True
num_fewshot = 5
private_test: bool = False
class TruthfulQA(Task):
code = "truthfulqa_vi"
name = "TruthfulQA-vi"
metric = "mc2"
higher_is_better = True
num_fewshot = 0
private_test: bool = False
class Grade12Exams(Task):
code = "grade_12_exams_vi"
name = "Grade 12 Exams"
metric = "acc_norm"
higher_is_better = True
num_fewshot = 5
private_test: bool = False
class IWSLT2023_en_vi(Task):
code = "translation_vi"
name = "IWSLT 2023 en-vi"
metric = "bleu"
higher_is_better = True
num_fewshot = 0
private_test: bool = False
class WikipediaQA(Task):
code = "wikipediaqa_vi"
name = "GeneralKnowledgeQA-vi"
metric = "acc_norm"
higher_is_better = True
num_fewshot = 5
private_test: bool = True
class Comprehension(Task):
code = "comprehension_vi"
name = "ComprehensionQA-vi"
metric = "acc_norm"
higher_is_better = True
num_fewshot = 0
private_test: bool = True
TASKS = [Arc, HellaSwag, MMLU, TruthfulQA] + [Lambada, WikipediaQA, Comprehension] |