File size: 1,895 Bytes
b0314f9
 
 
 
 
 
 
 
 
f8b127b
b0314f9
 
 
 
0cf0987
b0314f9
 
 
f8b127b
b0314f9
 
 
 
0cf0987
b0314f9
 
 
f8b127b
b0314f9
 
 
 
0cf0987
b0314f9
 
 
f8b127b
b0314f9
 
 
 
0cf0987
b0314f9
 
 
f8b127b
b0314f9
 
 
 
0cf0987
b0314f9
 
 
f8b127b
b0314f9
 
 
 
 
 
 
 
f8b127b
b0314f9
 
 
 
 
 
 
 
f8b127b
b0314f9
 
f8b127b
 
0cf0987
f8b127b
 
 
 
 
 
 
 
0cf0987
f8b127b
f86137f
 
f8b127b
f86137f
 
0cf0987
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
from dataclasses import dataclass

@dataclass
class Task:
    code: str
    name: str
    metric: str
    higher_is_better: bool = True
    num_fewshot: int = 0
    private_test: bool = False


class Lambada(Task):
    code = "lambada_vi"
    name = "LAMBADA-vi"
    metric = "ppl"
    higher_is_better = False
    num_fewshot = 0
    private_test: bool = True


class Arc(Task):
    code = "arc_vi"
    name = "ARC-vi"
    metric = "acc_norm"
    higher_is_better = True
    num_fewshot = 25
    private_test: bool = False


class HellaSwag(Task):
    code = "hellaswag_vi"
    name = "HellaSwag-vi"
    metric = "acc_norm"
    higher_is_better = True
    num_fewshot = 10
    private_test: bool = False


class MMLU(Task):
    code = "mmlu_vi"
    name = "MMLU-vi"
    metric = "acc_norm"
    higher_is_better = True
    num_fewshot = 5
    private_test: bool = False


class TruthfulQA(Task):
    code = "truthfulqa_vi"
    name = "TruthfulQA-vi"
    metric = "mc2"
    higher_is_better = True
    num_fewshot = 0
    private_test: bool = False


class Grade12Exams(Task):
    code = "grade_12_exams_vi"
    name = "Grade 12 Exams"
    metric = "acc_norm"
    higher_is_better = True
    num_fewshot = 5
    private_test: bool = False


class IWSLT2023_en_vi(Task):
    code = "translation_vi"
    name = "IWSLT 2023 en-vi"
    metric = "bleu"
    higher_is_better = True
    num_fewshot = 0
    private_test: bool = False


class WikipediaQA(Task):
    code = "wikipediaqa_vi"
    name = "GeneralKnowledgeQA-vi"
    metric = "acc_norm"
    higher_is_better = True
    num_fewshot = 5
    private_test: bool = True


class Comprehension(Task):
    code = "comprehension_vi"
    name = "ComprehensionQA-vi"
    metric = "acc_norm"
    higher_is_better = True
    num_fewshot = 0
    private_test: bool = True


TASKS = [Arc, HellaSwag, MMLU, TruthfulQA] + [Lambada, WikipediaQA, Comprehension]