mihaimasala
commited on
Update README.md
Browse files
README.md
CHANGED
@@ -12,7 +12,7 @@ model-index:
|
|
12 |
name: Romanian_Academic_Benchmarks
|
13 |
type: Romanian_Academic_Benchmarks
|
14 |
metrics:
|
15 |
-
- name: Average
|
16 |
type: accuracy
|
17 |
value: 38.033944444444444
|
18 |
- task:
|
@@ -21,7 +21,7 @@ model-index:
|
|
21 |
name: OpenLLM-Ro/ro_arc_challenge
|
22 |
type: OpenLLM-Ro/ro_arc_challenge
|
23 |
metrics:
|
24 |
-
- name: Average
|
25 |
type: accuracy
|
26 |
value: 37.945
|
27 |
- task:
|
@@ -30,7 +30,7 @@ model-index:
|
|
30 |
name: OpenLLM-Ro/ro_mmlu
|
31 |
type: OpenLLM-Ro/ro_mmlu
|
32 |
metrics:
|
33 |
-
- name: Average
|
34 |
type: accuracy
|
35 |
value: 27.2175
|
36 |
- task:
|
@@ -39,7 +39,7 @@ model-index:
|
|
39 |
name: OpenLLM-Ro/ro_winogrande
|
40 |
type: OpenLLM-Ro/ro_winogrande
|
41 |
metrics:
|
42 |
-
- name: Average
|
43 |
type: accuracy
|
44 |
value: 59.2925
|
45 |
- task:
|
@@ -48,7 +48,7 @@ model-index:
|
|
48 |
name: OpenLLM-Ro/ro_hellaswag
|
49 |
type: OpenLLM-Ro/ro_hellaswag
|
50 |
metrics:
|
51 |
-
- name: Average
|
52 |
type: accuracy
|
53 |
value: 57.222
|
54 |
- task:
|
@@ -57,7 +57,7 @@ model-index:
|
|
57 |
name: OpenLLM-Ro/ro_gsm8k
|
58 |
type: OpenLLM-Ro/ro_gsm8k
|
59 |
metrics:
|
60 |
-
- name: Average
|
61 |
type: accuracy
|
62 |
value: 2.526666666666667
|
63 |
- task:
|
@@ -66,7 +66,7 @@ model-index:
|
|
66 |
name: OpenLLM-Ro/ro_truthfulqa
|
67 |
type: OpenLLM-Ro/ro_truthfulqa
|
68 |
metrics:
|
69 |
-
- name: Average
|
70 |
type: accuracy
|
71 |
value: 44.0
|
72 |
- task:
|
@@ -75,7 +75,7 @@ model-index:
|
|
75 |
name: LaRoSeDa_binary
|
76 |
type: LaRoSeDa_binary
|
77 |
metrics:
|
78 |
-
- name: Average
|
79 |
type: macro-f1
|
80 |
value: 83.245
|
81 |
- task:
|
@@ -84,7 +84,7 @@ model-index:
|
|
84 |
name: LaRoSeDa_multiclass
|
85 |
type: LaRoSeDa_multiclass
|
86 |
metrics:
|
87 |
-
- name: Average
|
88 |
type: macro-f1
|
89 |
value: 61.040000000000006
|
90 |
- task:
|
@@ -93,7 +93,7 @@ model-index:
|
|
93 |
name: LaRoSeDa_binary_finetuned
|
94 |
type: LaRoSeDa_binary_finetuned
|
95 |
metrics:
|
96 |
-
- name: Average
|
97 |
type: macro-f1
|
98 |
value: 98.9666527738873
|
99 |
- task:
|
@@ -102,7 +102,7 @@ model-index:
|
|
102 |
name: LaRoSeDa_multiclass_finetuned
|
103 |
type: LaRoSeDa_multiclass_finetuned
|
104 |
metrics:
|
105 |
-
- name: Average
|
106 |
type: macro-f1
|
107 |
value: 87.7198050161592
|
108 |
- task:
|
@@ -111,7 +111,7 @@ model-index:
|
|
111 |
name: WMT_EN-RO
|
112 |
type: WMT_EN-RO
|
113 |
metrics:
|
114 |
-
- name: Average
|
115 |
type: bleu
|
116 |
value: 10.01
|
117 |
- task:
|
@@ -120,7 +120,7 @@ model-index:
|
|
120 |
name: WMT_RO-EN
|
121 |
type: WMT_RO-EN
|
122 |
metrics:
|
123 |
-
- name: Average
|
124 |
type: bleu
|
125 |
value: 13.032499999999999
|
126 |
- task:
|
@@ -129,7 +129,7 @@ model-index:
|
|
129 |
name: WMT_EN-RO_finetuned
|
130 |
type: WMT_EN-RO_finetuned
|
131 |
metrics:
|
132 |
-
- name: Average
|
133 |
type: bleu
|
134 |
value: 27.8468226696502
|
135 |
- task:
|
@@ -138,7 +138,7 @@ model-index:
|
|
138 |
name: WMT_RO-EN_finetuned
|
139 |
type: WMT_RO-EN_finetuned
|
140 |
metrics:
|
141 |
-
- name: Average
|
142 |
type: bleu
|
143 |
value: 39.300054525566104
|
144 |
- task:
|
@@ -147,7 +147,7 @@ model-index:
|
|
147 |
name: XQuAD
|
148 |
type: XQuAD
|
149 |
metrics:
|
150 |
-
- name: Average
|
151 |
type: exact_match
|
152 |
value: 30.14705
|
153 |
- task:
|
@@ -156,7 +156,7 @@ model-index:
|
|
156 |
name: XQuAD
|
157 |
type: XQuAD
|
158 |
metrics:
|
159 |
-
- name: Average
|
160 |
type: f1
|
161 |
value: 47.0304
|
162 |
- task:
|
@@ -165,7 +165,7 @@ model-index:
|
|
165 |
name: XQuAD_finetuned
|
166 |
type: XQuAD_finetuned
|
167 |
metrics:
|
168 |
-
- name: Average
|
169 |
type: exact_match
|
170 |
value: 67.0588235294117
|
171 |
- task:
|
@@ -174,7 +174,7 @@ model-index:
|
|
174 |
name: XQuAD
|
175 |
type: XQuAD
|
176 |
metrics:
|
177 |
-
- name: Average
|
178 |
type: f1
|
179 |
value: 79.9624140326139
|
180 |
- task:
|
@@ -183,7 +183,7 @@ model-index:
|
|
183 |
name: STS
|
184 |
type: STS
|
185 |
metrics:
|
186 |
-
- name: Average
|
187 |
type: spearman
|
188 |
value: 7.886666666666667
|
189 |
- task:
|
@@ -192,7 +192,7 @@ model-index:
|
|
192 |
name: STS
|
193 |
type: STS
|
194 |
metrics:
|
195 |
-
- name: Average
|
196 |
type: pearson
|
197 |
value: 7.976666666666667
|
198 |
- task:
|
@@ -201,7 +201,7 @@ model-index:
|
|
201 |
name: STS_finetuned
|
202 |
type: STS_finetuned
|
203 |
metrics:
|
204 |
-
- name: Average
|
205 |
type: spearman
|
206 |
value: 71.749068976055
|
207 |
- task:
|
@@ -210,7 +210,7 @@ model-index:
|
|
210 |
name: STS_finetuned
|
211 |
type: STS_finetuned
|
212 |
metrics:
|
213 |
-
- name: Average
|
214 |
type: pearson
|
215 |
value: 71.9866084841066
|
216 |
---
|
|
|
12 |
name: Romanian_Academic_Benchmarks
|
13 |
type: Romanian_Academic_Benchmarks
|
14 |
metrics:
|
15 |
+
- name: Average accuracy
|
16 |
type: accuracy
|
17 |
value: 38.033944444444444
|
18 |
- task:
|
|
|
21 |
name: OpenLLM-Ro/ro_arc_challenge
|
22 |
type: OpenLLM-Ro/ro_arc_challenge
|
23 |
metrics:
|
24 |
+
- name: Average accuracy
|
25 |
type: accuracy
|
26 |
value: 37.945
|
27 |
- task:
|
|
|
30 |
name: OpenLLM-Ro/ro_mmlu
|
31 |
type: OpenLLM-Ro/ro_mmlu
|
32 |
metrics:
|
33 |
+
- name: Average accuracy
|
34 |
type: accuracy
|
35 |
value: 27.2175
|
36 |
- task:
|
|
|
39 |
name: OpenLLM-Ro/ro_winogrande
|
40 |
type: OpenLLM-Ro/ro_winogrande
|
41 |
metrics:
|
42 |
+
- name: Average accuracy
|
43 |
type: accuracy
|
44 |
value: 59.2925
|
45 |
- task:
|
|
|
48 |
name: OpenLLM-Ro/ro_hellaswag
|
49 |
type: OpenLLM-Ro/ro_hellaswag
|
50 |
metrics:
|
51 |
+
- name: Average accuracy
|
52 |
type: accuracy
|
53 |
value: 57.222
|
54 |
- task:
|
|
|
57 |
name: OpenLLM-Ro/ro_gsm8k
|
58 |
type: OpenLLM-Ro/ro_gsm8k
|
59 |
metrics:
|
60 |
+
- name: Average accuracy
|
61 |
type: accuracy
|
62 |
value: 2.526666666666667
|
63 |
- task:
|
|
|
66 |
name: OpenLLM-Ro/ro_truthfulqa
|
67 |
type: OpenLLM-Ro/ro_truthfulqa
|
68 |
metrics:
|
69 |
+
- name: Average accuracy
|
70 |
type: accuracy
|
71 |
value: 44.0
|
72 |
- task:
|
|
|
75 |
name: LaRoSeDa_binary
|
76 |
type: LaRoSeDa_binary
|
77 |
metrics:
|
78 |
+
- name: Average macro-f1
|
79 |
type: macro-f1
|
80 |
value: 83.245
|
81 |
- task:
|
|
|
84 |
name: LaRoSeDa_multiclass
|
85 |
type: LaRoSeDa_multiclass
|
86 |
metrics:
|
87 |
+
- name: Average macro-f1
|
88 |
type: macro-f1
|
89 |
value: 61.040000000000006
|
90 |
- task:
|
|
|
93 |
name: LaRoSeDa_binary_finetuned
|
94 |
type: LaRoSeDa_binary_finetuned
|
95 |
metrics:
|
96 |
+
- name: Average macro-f1
|
97 |
type: macro-f1
|
98 |
value: 98.9666527738873
|
99 |
- task:
|
|
|
102 |
name: LaRoSeDa_multiclass_finetuned
|
103 |
type: LaRoSeDa_multiclass_finetuned
|
104 |
metrics:
|
105 |
+
- name: Average macro-f1
|
106 |
type: macro-f1
|
107 |
value: 87.7198050161592
|
108 |
- task:
|
|
|
111 |
name: WMT_EN-RO
|
112 |
type: WMT_EN-RO
|
113 |
metrics:
|
114 |
+
- name: Average bleu
|
115 |
type: bleu
|
116 |
value: 10.01
|
117 |
- task:
|
|
|
120 |
name: WMT_RO-EN
|
121 |
type: WMT_RO-EN
|
122 |
metrics:
|
123 |
+
- name: Average bleu
|
124 |
type: bleu
|
125 |
value: 13.032499999999999
|
126 |
- task:
|
|
|
129 |
name: WMT_EN-RO_finetuned
|
130 |
type: WMT_EN-RO_finetuned
|
131 |
metrics:
|
132 |
+
- name: Average bleu
|
133 |
type: bleu
|
134 |
value: 27.8468226696502
|
135 |
- task:
|
|
|
138 |
name: WMT_RO-EN_finetuned
|
139 |
type: WMT_RO-EN_finetuned
|
140 |
metrics:
|
141 |
+
- name: Average bleu
|
142 |
type: bleu
|
143 |
value: 39.300054525566104
|
144 |
- task:
|
|
|
147 |
name: XQuAD
|
148 |
type: XQuAD
|
149 |
metrics:
|
150 |
+
- name: Average exact_match
|
151 |
type: exact_match
|
152 |
value: 30.14705
|
153 |
- task:
|
|
|
156 |
name: XQuAD
|
157 |
type: XQuAD
|
158 |
metrics:
|
159 |
+
- name: Average f1
|
160 |
type: f1
|
161 |
value: 47.0304
|
162 |
- task:
|
|
|
165 |
name: XQuAD_finetuned
|
166 |
type: XQuAD_finetuned
|
167 |
metrics:
|
168 |
+
- name: Average exact_match
|
169 |
type: exact_match
|
170 |
value: 67.0588235294117
|
171 |
- task:
|
|
|
174 |
name: XQuAD
|
175 |
type: XQuAD
|
176 |
metrics:
|
177 |
+
- name: Average f1
|
178 |
type: f1
|
179 |
value: 79.9624140326139
|
180 |
- task:
|
|
|
183 |
name: STS
|
184 |
type: STS
|
185 |
metrics:
|
186 |
+
- name: Average spearman
|
187 |
type: spearman
|
188 |
value: 7.886666666666667
|
189 |
- task:
|
|
|
192 |
name: STS
|
193 |
type: STS
|
194 |
metrics:
|
195 |
+
- name: Average pearson
|
196 |
type: pearson
|
197 |
value: 7.976666666666667
|
198 |
- task:
|
|
|
201 |
name: STS_finetuned
|
202 |
type: STS_finetuned
|
203 |
metrics:
|
204 |
+
- name: Average spearman
|
205 |
type: spearman
|
206 |
value: 71.749068976055
|
207 |
- task:
|
|
|
210 |
name: STS_finetuned
|
211 |
type: STS_finetuned
|
212 |
metrics:
|
213 |
+
- name: Average pearson
|
214 |
type: pearson
|
215 |
value: 71.9866084841066
|
216 |
---
|