mihaimasala commited on
Commit
a82d63a
·
verified ·
1 Parent(s): 1bae7d1

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +23 -23
README.md CHANGED
@@ -12,7 +12,7 @@ model-index:
12
  name: Romanian_Academic_Benchmarks
13
  type: Romanian_Academic_Benchmarks
14
  metrics:
15
- - name: Average
16
  type: accuracy
17
  value: 38.033944444444444
18
  - task:
@@ -21,7 +21,7 @@ model-index:
21
  name: OpenLLM-Ro/ro_arc_challenge
22
  type: OpenLLM-Ro/ro_arc_challenge
23
  metrics:
24
- - name: Average
25
  type: accuracy
26
  value: 37.945
27
  - task:
@@ -30,7 +30,7 @@ model-index:
30
  name: OpenLLM-Ro/ro_mmlu
31
  type: OpenLLM-Ro/ro_mmlu
32
  metrics:
33
- - name: Average
34
  type: accuracy
35
  value: 27.2175
36
  - task:
@@ -39,7 +39,7 @@ model-index:
39
  name: OpenLLM-Ro/ro_winogrande
40
  type: OpenLLM-Ro/ro_winogrande
41
  metrics:
42
- - name: Average
43
  type: accuracy
44
  value: 59.2925
45
  - task:
@@ -48,7 +48,7 @@ model-index:
48
  name: OpenLLM-Ro/ro_hellaswag
49
  type: OpenLLM-Ro/ro_hellaswag
50
  metrics:
51
- - name: Average
52
  type: accuracy
53
  value: 57.222
54
  - task:
@@ -57,7 +57,7 @@ model-index:
57
  name: OpenLLM-Ro/ro_gsm8k
58
  type: OpenLLM-Ro/ro_gsm8k
59
  metrics:
60
- - name: Average
61
  type: accuracy
62
  value: 2.526666666666667
63
  - task:
@@ -66,7 +66,7 @@ model-index:
66
  name: OpenLLM-Ro/ro_truthfulqa
67
  type: OpenLLM-Ro/ro_truthfulqa
68
  metrics:
69
- - name: Average
70
  type: accuracy
71
  value: 44.0
72
  - task:
@@ -75,7 +75,7 @@ model-index:
75
  name: LaRoSeDa_binary
76
  type: LaRoSeDa_binary
77
  metrics:
78
- - name: Average
79
  type: macro-f1
80
  value: 83.245
81
  - task:
@@ -84,7 +84,7 @@ model-index:
84
  name: LaRoSeDa_multiclass
85
  type: LaRoSeDa_multiclass
86
  metrics:
87
- - name: Average
88
  type: macro-f1
89
  value: 61.040000000000006
90
  - task:
@@ -93,7 +93,7 @@ model-index:
93
  name: LaRoSeDa_binary_finetuned
94
  type: LaRoSeDa_binary_finetuned
95
  metrics:
96
- - name: Average
97
  type: macro-f1
98
  value: 98.9666527738873
99
  - task:
@@ -102,7 +102,7 @@ model-index:
102
  name: LaRoSeDa_multiclass_finetuned
103
  type: LaRoSeDa_multiclass_finetuned
104
  metrics:
105
- - name: Average
106
  type: macro-f1
107
  value: 87.7198050161592
108
  - task:
@@ -111,7 +111,7 @@ model-index:
111
  name: WMT_EN-RO
112
  type: WMT_EN-RO
113
  metrics:
114
- - name: Average
115
  type: bleu
116
  value: 10.01
117
  - task:
@@ -120,7 +120,7 @@ model-index:
120
  name: WMT_RO-EN
121
  type: WMT_RO-EN
122
  metrics:
123
- - name: Average
124
  type: bleu
125
  value: 13.032499999999999
126
  - task:
@@ -129,7 +129,7 @@ model-index:
129
  name: WMT_EN-RO_finetuned
130
  type: WMT_EN-RO_finetuned
131
  metrics:
132
- - name: Average
133
  type: bleu
134
  value: 27.8468226696502
135
  - task:
@@ -138,7 +138,7 @@ model-index:
138
  name: WMT_RO-EN_finetuned
139
  type: WMT_RO-EN_finetuned
140
  metrics:
141
- - name: Average
142
  type: bleu
143
  value: 39.300054525566104
144
  - task:
@@ -147,7 +147,7 @@ model-index:
147
  name: XQuAD
148
  type: XQuAD
149
  metrics:
150
- - name: Average
151
  type: exact_match
152
  value: 30.14705
153
  - task:
@@ -156,7 +156,7 @@ model-index:
156
  name: XQuAD
157
  type: XQuAD
158
  metrics:
159
- - name: Average
160
  type: f1
161
  value: 47.0304
162
  - task:
@@ -165,7 +165,7 @@ model-index:
165
  name: XQuAD_finetuned
166
  type: XQuAD_finetuned
167
  metrics:
168
- - name: Average
169
  type: exact_match
170
  value: 67.0588235294117
171
  - task:
@@ -174,7 +174,7 @@ model-index:
174
  name: XQuAD
175
  type: XQuAD
176
  metrics:
177
- - name: Average
178
  type: f1
179
  value: 79.9624140326139
180
  - task:
@@ -183,7 +183,7 @@ model-index:
183
  name: STS
184
  type: STS
185
  metrics:
186
- - name: Average
187
  type: spearman
188
  value: 7.886666666666667
189
  - task:
@@ -192,7 +192,7 @@ model-index:
192
  name: STS
193
  type: STS
194
  metrics:
195
- - name: Average
196
  type: pearson
197
  value: 7.976666666666667
198
  - task:
@@ -201,7 +201,7 @@ model-index:
201
  name: STS_finetuned
202
  type: STS_finetuned
203
  metrics:
204
- - name: Average
205
  type: spearman
206
  value: 71.749068976055
207
  - task:
@@ -210,7 +210,7 @@ model-index:
210
  name: STS_finetuned
211
  type: STS_finetuned
212
  metrics:
213
- - name: Average
214
  type: pearson
215
  value: 71.9866084841066
216
  ---
 
12
  name: Romanian_Academic_Benchmarks
13
  type: Romanian_Academic_Benchmarks
14
  metrics:
15
+ - name: Average accuracy
16
  type: accuracy
17
  value: 38.033944444444444
18
  - task:
 
21
  name: OpenLLM-Ro/ro_arc_challenge
22
  type: OpenLLM-Ro/ro_arc_challenge
23
  metrics:
24
+ - name: Average accuracy
25
  type: accuracy
26
  value: 37.945
27
  - task:
 
30
  name: OpenLLM-Ro/ro_mmlu
31
  type: OpenLLM-Ro/ro_mmlu
32
  metrics:
33
+ - name: Average accuracy
34
  type: accuracy
35
  value: 27.2175
36
  - task:
 
39
  name: OpenLLM-Ro/ro_winogrande
40
  type: OpenLLM-Ro/ro_winogrande
41
  metrics:
42
+ - name: Average accuracy
43
  type: accuracy
44
  value: 59.2925
45
  - task:
 
48
  name: OpenLLM-Ro/ro_hellaswag
49
  type: OpenLLM-Ro/ro_hellaswag
50
  metrics:
51
+ - name: Average accuracy
52
  type: accuracy
53
  value: 57.222
54
  - task:
 
57
  name: OpenLLM-Ro/ro_gsm8k
58
  type: OpenLLM-Ro/ro_gsm8k
59
  metrics:
60
+ - name: Average accuracy
61
  type: accuracy
62
  value: 2.526666666666667
63
  - task:
 
66
  name: OpenLLM-Ro/ro_truthfulqa
67
  type: OpenLLM-Ro/ro_truthfulqa
68
  metrics:
69
+ - name: Average accuracy
70
  type: accuracy
71
  value: 44.0
72
  - task:
 
75
  name: LaRoSeDa_binary
76
  type: LaRoSeDa_binary
77
  metrics:
78
+ - name: Average macro-f1
79
  type: macro-f1
80
  value: 83.245
81
  - task:
 
84
  name: LaRoSeDa_multiclass
85
  type: LaRoSeDa_multiclass
86
  metrics:
87
+ - name: Average macro-f1
88
  type: macro-f1
89
  value: 61.040000000000006
90
  - task:
 
93
  name: LaRoSeDa_binary_finetuned
94
  type: LaRoSeDa_binary_finetuned
95
  metrics:
96
+ - name: Average macro-f1
97
  type: macro-f1
98
  value: 98.9666527738873
99
  - task:
 
102
  name: LaRoSeDa_multiclass_finetuned
103
  type: LaRoSeDa_multiclass_finetuned
104
  metrics:
105
+ - name: Average macro-f1
106
  type: macro-f1
107
  value: 87.7198050161592
108
  - task:
 
111
  name: WMT_EN-RO
112
  type: WMT_EN-RO
113
  metrics:
114
+ - name: Average bleu
115
  type: bleu
116
  value: 10.01
117
  - task:
 
120
  name: WMT_RO-EN
121
  type: WMT_RO-EN
122
  metrics:
123
+ - name: Average bleu
124
  type: bleu
125
  value: 13.032499999999999
126
  - task:
 
129
  name: WMT_EN-RO_finetuned
130
  type: WMT_EN-RO_finetuned
131
  metrics:
132
+ - name: Average bleu
133
  type: bleu
134
  value: 27.8468226696502
135
  - task:
 
138
  name: WMT_RO-EN_finetuned
139
  type: WMT_RO-EN_finetuned
140
  metrics:
141
+ - name: Average bleu
142
  type: bleu
143
  value: 39.300054525566104
144
  - task:
 
147
  name: XQuAD
148
  type: XQuAD
149
  metrics:
150
+ - name: Average exact_match
151
  type: exact_match
152
  value: 30.14705
153
  - task:
 
156
  name: XQuAD
157
  type: XQuAD
158
  metrics:
159
+ - name: Average f1
160
  type: f1
161
  value: 47.0304
162
  - task:
 
165
  name: XQuAD_finetuned
166
  type: XQuAD_finetuned
167
  metrics:
168
+ - name: Average exact_match
169
  type: exact_match
170
  value: 67.0588235294117
171
  - task:
 
174
  name: XQuAD
175
  type: XQuAD
176
  metrics:
177
+ - name: Average f1
178
  type: f1
179
  value: 79.9624140326139
180
  - task:
 
183
  name: STS
184
  type: STS
185
  metrics:
186
+ - name: Average spearman
187
  type: spearman
188
  value: 7.886666666666667
189
  - task:
 
192
  name: STS
193
  type: STS
194
  metrics:
195
+ - name: Average pearson
196
  type: pearson
197
  value: 7.976666666666667
198
  - task:
 
201
  name: STS_finetuned
202
  type: STS_finetuned
203
  metrics:
204
+ - name: Average spearman
205
  type: spearman
206
  value: 71.749068976055
207
  - task:
 
210
  name: STS_finetuned
211
  type: STS_finetuned
212
  metrics:
213
+ - name: Average pearson
214
  type: pearson
215
  value: 71.9866084841066
216
  ---