mihaimasala commited on
Commit
812b32d
1 Parent(s): c12ab60

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +443 -305
README.md CHANGED
@@ -4,311 +4,449 @@ language:
4
  - ro
5
  base_model: meta-llama/Llama-2-7b-hf
6
  model-index:
7
- - name: OpenLLM-Ro/RoLlama2-7b-Base
8
- results:
9
- - task:
10
- type: text-generation
11
- dataset:
12
- name: Romanian_Academic_Benchmarks
13
- type: Romanian_Academic_Benchmarks
14
- metrics:
15
- - name: Average accuracy
16
- type: accuracy
17
- value: 38.033944444444444
18
- - task:
19
- type: text-generation
20
- dataset:
21
- name: OpenLLM-Ro/ro_arc_challenge
22
- type: OpenLLM-Ro/ro_arc_challenge
23
- metrics:
24
- - name: Average accuracy
25
- type: accuracy
26
- value: 37.945
27
- - task:
28
- type: text-generation
29
- dataset:
30
- name: OpenLLM-Ro/ro_mmlu
31
- type: OpenLLM-Ro/ro_mmlu
32
- metrics:
33
- - name: Average accuracy
34
- type: accuracy
35
- value: 27.2175
36
- - task:
37
- type: text-generation
38
- dataset:
39
- name: OpenLLM-Ro/ro_winogrande
40
- type: OpenLLM-Ro/ro_winogrande
41
- metrics:
42
- - name: Average accuracy
43
- type: accuracy
44
- value: 59.2925
45
- - task:
46
- type: text-generation
47
- dataset:
48
- name: OpenLLM-Ro/ro_hellaswag
49
- type: OpenLLM-Ro/ro_hellaswag
50
- metrics:
51
- - name: Average accuracy
52
- type: accuracy
53
- value: 57.222
54
- - task:
55
- type: text-generation
56
- dataset:
57
- name: OpenLLM-Ro/ro_gsm8k
58
- type: OpenLLM-Ro/ro_gsm8k
59
- metrics:
60
- - name: Average accuracy
61
- type: accuracy
62
- value: 2.526666666666667
63
- - task:
64
- type: text-generation
65
- dataset:
66
- name: OpenLLM-Ro/ro_truthfulqa
67
- type: OpenLLM-Ro/ro_truthfulqa
68
- metrics:
69
- - name: Average accuracy
70
- type: accuracy
71
- value: 44.0
72
- - task:
73
- type: text-generation
74
- dataset:
75
- name: LaRoSeDa_binary
76
- type: LaRoSeDa_binary
77
- metrics:
78
- - name: Average macro-f1
79
- type: macro-f1
80
- value: 83.245
81
- - task:
82
- type: text-generation
83
- dataset:
84
- name: LaRoSeDa_multiclass
85
- type: LaRoSeDa_multiclass
86
- metrics:
87
- - name: Average macro-f1
88
- type: macro-f1
89
- value: 61.040000000000006
90
- - task:
91
- type: text-generation
92
- dataset:
93
- name: LaRoSeDa_binary_finetuned
94
- type: LaRoSeDa_binary_finetuned
95
- metrics:
96
- - name: Average macro-f1
97
- type: macro-f1
98
- value: 98.9666527738873
99
- - task:
100
- type: text-generation
101
- dataset:
102
- name: LaRoSeDa_multiclass_finetuned
103
- type: LaRoSeDa_multiclass_finetuned
104
- metrics:
105
- - name: Average macro-f1
106
- type: macro-f1
107
- value: 87.7198050161592
108
- - task:
109
- type: text-generation
110
- dataset:
111
- name: WMT_EN-RO
112
- type: WMT_EN-RO
113
- metrics:
114
- - name: Average bleu
115
- type: bleu
116
- value: 10.01
117
- - task:
118
- type: text-generation
119
- dataset:
120
- name: WMT_RO-EN
121
- type: WMT_RO-EN
122
- metrics:
123
- - name: Average bleu
124
- type: bleu
125
- value: 13.032499999999999
126
- - task:
127
- type: text-generation
128
- dataset:
129
- name: WMT_EN-RO_finetuned
130
- type: WMT_EN-RO_finetuned
131
- metrics:
132
- - name: Average bleu
133
- type: bleu
134
- value: 27.8468226696502
135
- - task:
136
- type: text-generation
137
- dataset:
138
- name: WMT_RO-EN_finetuned
139
- type: WMT_RO-EN_finetuned
140
- metrics:
141
- - name: Average bleu
142
- type: bleu
143
- value: 39.300054525566104
144
- - task:
145
- type: text-generation
146
- dataset:
147
- name: XQuAD
148
- type: XQuAD
149
- metrics:
150
- - name: Average exact_match
151
- type: exact_match
152
- value: 30.14705
153
- - task:
154
- type: text-generation
155
- dataset:
156
- name: XQuAD
157
- type: XQuAD
158
- metrics:
159
- - name: Average f1
160
- type: f1
161
- value: 47.0304
162
- - task:
163
- type: text-generation
164
- dataset:
165
- name: XQuAD_finetuned
166
- type: XQuAD_finetuned
167
- metrics:
168
- - name: Average exact_match
169
- type: exact_match
170
- value: 67.0588235294117
171
- - task:
172
- type: text-generation
173
- dataset:
174
- name: XQuAD
175
- type: XQuAD
176
- metrics:
177
- - name: Average f1
178
- type: f1
179
- value: 79.9624140326139
180
- - task:
181
- type: text-generation
182
- dataset:
183
- name: STS
184
- type: STS
185
- metrics:
186
- - name: Average spearman
187
- type: spearman
188
- value: 7.886666666666667
189
- - task:
190
- type: text-generation
191
- dataset:
192
- name: STS
193
- type: STS
194
- metrics:
195
- - name: Average pearson
196
- type: pearson
197
- value: 7.976666666666667
198
- - task:
199
- type: text-generation
200
- dataset:
201
- name: STS_finetuned
202
- type: STS_finetuned
203
- metrics:
204
- - name: Average spearman
205
- type: spearman
206
- value: 71.749068976055
207
- - task:
208
- type: text-generation
209
- dataset:
210
- name: STS_finetuned
211
- type: STS_finetuned
212
- metrics:
213
- - name: Average pearson
214
- type: pearson
215
- value: 71.9866084841066
216
- - task:
217
- type: text-generation
218
- dataset:
219
- name: OpenLLM-Ro/ro_arc_challenge
220
- type: OpenLLM-Ro/ro_arc_challenge
221
- metrics:
222
- - name: 0-shot
223
- type: accuracy
224
- value: 35.56
225
- - name: 1-shot
226
- type: accuracy
227
- value: 36.42
228
- - name: 3-shot
229
- type: accuracy
230
- value: 38.56
231
- - name: 5-shot
232
- type: accuracy
233
- value: 38.39
234
- - name: 10-shot
235
- type: accuracy
236
- value: 39.07
237
- - name: 25-shot
238
- type: accuracy
239
- value: 39.67
240
- - task:
241
- type: text-generation
242
- dataset:
243
- name: OpenLLM-Ro/ro_mmlu
244
- type: OpenLLM-Ro/ro_mmlu
245
- metrics:
246
- - name: 0-shot
247
- type: accuracy
248
- value: 25.82
249
- - name: 1-shot
250
- type: accuracy
251
- value: 25.48
252
- - name: 3-shot
253
- type: accuracy
254
- value: 27.61
255
- - name: 5-shot
256
- type: accuracy
257
- value: 29.96
258
- - task:
259
- type: text-generation
260
- dataset:
261
- name: OpenLLM-Ro/ro_winogrande
262
- type: OpenLLM-Ro/ro_winogrande
263
- metrics:
264
- - name: 0-shot
265
- type: accuracy
266
- value: 58.72
267
- - name: 1-shot
268
- type: accuracy
269
- value: 58.88
270
- - name: 3-shot
271
- type: accuracy
272
- value: 60.38
273
- - name: 5-shot
274
- type: accuracy
275
- value: 59.19
276
- - task:
277
- type: text-generation
278
- dataset:
279
- name: OpenLLM-Ro/ro_hellaswag
280
- type: OpenLLM-Ro/ro_hellaswag
281
- metrics:
282
- - name: 0-shot
283
- type: accuracy
284
- value: 55.85
285
- - name: 1-shot
286
- type: accuracy
287
- value: 57.06
288
- - name: 3-shot
289
- type: accuracy
290
- value: 57.52
291
- - name: 5-shot
292
- type: accuracy
293
- value: 57.89
294
- - name: 10-shot
295
- type: accuracy
296
- value: 57.79
297
- - task:
298
- type: text-generation
299
- dataset:
300
- name: OpenLLM-Ro/ro_gsm8k
301
- type: OpenLLM-Ro/ro_gsm8k
302
- metrics:
303
- - name: 0-shot
304
- type: accuracy
305
- value: 0.0
306
- - name: 1-shot
307
- type: accuracy
308
- value: 2.96
309
- - name: 3-shot
310
- type: accuracy
311
- value: 4.62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
  ---
313
 
314
  # Model Card for Model ID
 
4
  - ro
5
  base_model: meta-llama/Llama-2-7b-hf
6
  model-index:
7
+ - name: OpenLLM-Ro/RoLlama2-7b-Base
8
+ results:
9
+ - task:
10
+ type: text-generation
11
+ dataset:
12
+ name: Romanian_Academic_Benchmarks
13
+ type: Romanian_Academic_Benchmarks
14
+ metrics:
15
+ - name: Average accuracy
16
+ type: accuracy
17
+ value: 38.033944444444444
18
+ - task:
19
+ type: text-generation
20
+ dataset:
21
+ name: OpenLLM-Ro/ro_arc_challenge
22
+ type: OpenLLM-Ro/ro_arc_challenge
23
+ metrics:
24
+ - name: Average accuracy
25
+ type: accuracy
26
+ value: 37.945
27
+ - task:
28
+ type: text-generation
29
+ dataset:
30
+ name: OpenLLM-Ro/ro_mmlu
31
+ type: OpenLLM-Ro/ro_mmlu
32
+ metrics:
33
+ - name: Average accuracy
34
+ type: accuracy
35
+ value: 27.2175
36
+ - task:
37
+ type: text-generation
38
+ dataset:
39
+ name: OpenLLM-Ro/ro_winogrande
40
+ type: OpenLLM-Ro/ro_winogrande
41
+ metrics:
42
+ - name: Average accuracy
43
+ type: accuracy
44
+ value: 59.2925
45
+ - task:
46
+ type: text-generation
47
+ dataset:
48
+ name: OpenLLM-Ro/ro_hellaswag
49
+ type: OpenLLM-Ro/ro_hellaswag
50
+ metrics:
51
+ - name: Average accuracy
52
+ type: accuracy
53
+ value: 57.222
54
+ - task:
55
+ type: text-generation
56
+ dataset:
57
+ name: OpenLLM-Ro/ro_gsm8k
58
+ type: OpenLLM-Ro/ro_gsm8k
59
+ metrics:
60
+ - name: Average accuracy
61
+ type: accuracy
62
+ value: 2.526666666666667
63
+ - task:
64
+ type: text-generation
65
+ dataset:
66
+ name: OpenLLM-Ro/ro_truthfulqa
67
+ type: OpenLLM-Ro/ro_truthfulqa
68
+ metrics:
69
+ - name: Average accuracy
70
+ type: accuracy
71
+ value: 44.0
72
+ - task:
73
+ type: text-generation
74
+ dataset:
75
+ name: LaRoSeDa_binary
76
+ type: LaRoSeDa_binary
77
+ metrics:
78
+ - name: Average macro-f1
79
+ type: macro-f1
80
+ value: 83.245
81
+ - task:
82
+ type: text-generation
83
+ dataset:
84
+ name: LaRoSeDa_multiclass
85
+ type: LaRoSeDa_multiclass
86
+ metrics:
87
+ - name: Average macro-f1
88
+ type: macro-f1
89
+ value: 61.040000000000006
90
+ - task:
91
+ type: text-generation
92
+ dataset:
93
+ name: LaRoSeDa_binary_finetuned
94
+ type: LaRoSeDa_binary_finetuned
95
+ metrics:
96
+ - name: Average macro-f1
97
+ type: macro-f1
98
+ value: 98.9666527738873
99
+ - task:
100
+ type: text-generation
101
+ dataset:
102
+ name: LaRoSeDa_multiclass_finetuned
103
+ type: LaRoSeDa_multiclass_finetuned
104
+ metrics:
105
+ - name: Average macro-f1
106
+ type: macro-f1
107
+ value: 87.7198050161592
108
+ - task:
109
+ type: text-generation
110
+ dataset:
111
+ name: WMT_EN-RO
112
+ type: WMT_EN-RO
113
+ metrics:
114
+ - name: Average bleu
115
+ type: bleu
116
+ value: 10.01
117
+ - task:
118
+ type: text-generation
119
+ dataset:
120
+ name: WMT_RO-EN
121
+ type: WMT_RO-EN
122
+ metrics:
123
+ - name: Average bleu
124
+ type: bleu
125
+ value: 13.032499999999999
126
+ - task:
127
+ type: text-generation
128
+ dataset:
129
+ name: WMT_EN-RO_finetuned
130
+ type: WMT_EN-RO_finetuned
131
+ metrics:
132
+ - name: Average bleu
133
+ type: bleu
134
+ value: 27.8468226696502
135
+ - task:
136
+ type: text-generation
137
+ dataset:
138
+ name: WMT_RO-EN_finetuned
139
+ type: WMT_RO-EN_finetuned
140
+ metrics:
141
+ - name: Average bleu
142
+ type: bleu
143
+ value: 39.300054525566104
144
+ - task:
145
+ type: text-generation
146
+ dataset:
147
+ name: XQuAD
148
+ type: XQuAD
149
+ metrics:
150
+ - name: Average exact_match
151
+ type: exact_match
152
+ value: 30.14705
153
+ - task:
154
+ type: text-generation
155
+ dataset:
156
+ name: XQuAD
157
+ type: XQuAD
158
+ metrics:
159
+ - name: Average f1
160
+ type: f1
161
+ value: 47.0304
162
+ - task:
163
+ type: text-generation
164
+ dataset:
165
+ name: XQuAD_finetuned
166
+ type: XQuAD_finetuned
167
+ metrics:
168
+ - name: Average exact_match
169
+ type: exact_match
170
+ value: 67.0588235294117
171
+ - task:
172
+ type: text-generation
173
+ dataset:
174
+ name: XQuAD_finetuned
175
+ type: XQuAD_finetuned
176
+ metrics:
177
+ - name: Average f1
178
+ type: f1
179
+ value: 79.9624140326139
180
+ - task:
181
+ type: text-generation
182
+ dataset:
183
+ name: STS
184
+ type: STS
185
+ metrics:
186
+ - name: Average spearman
187
+ type: spearman
188
+ value: 7.886666666666667
189
+ - task:
190
+ type: text-generation
191
+ dataset:
192
+ name: STS
193
+ type: STS
194
+ metrics:
195
+ - name: Average pearson
196
+ type: pearson
197
+ value: 7.976666666666667
198
+ - task:
199
+ type: text-generation
200
+ dataset:
201
+ name: STS_finetuned
202
+ type: STS_finetuned
203
+ metrics:
204
+ - name: Average spearman
205
+ type: spearman
206
+ value: 71.749068976055
207
+ - task:
208
+ type: text-generation
209
+ dataset:
210
+ name: STS_finetuned
211
+ type: STS_finetuned
212
+ metrics:
213
+ - name: Average pearson
214
+ type: pearson
215
+ value: 71.9866084841066
216
+ - task:
217
+ type: text-generation
218
+ dataset:
219
+ name: OpenLLM-Ro/ro_arc_challenge
220
+ type: OpenLLM-Ro/ro_arc_challenge
221
+ metrics:
222
+ - name: 0-shot
223
+ type: accuracy
224
+ value: 35.56
225
+ - name: 1-shot
226
+ type: accuracy
227
+ value: 36.42
228
+ - name: 3-shot
229
+ type: accuracy
230
+ value: 38.56
231
+ - name: 5-shot
232
+ type: accuracy
233
+ value: 38.39
234
+ - name: 10-shot
235
+ type: accuracy
236
+ value: 39.07
237
+ - name: 25-shot
238
+ type: accuracy
239
+ value: 39.67
240
+ - task:
241
+ type: text-generation
242
+ dataset:
243
+ name: OpenLLM-Ro/ro_mmlu
244
+ type: OpenLLM-Ro/ro_mmlu
245
+ metrics:
246
+ - name: 0-shot
247
+ type: accuracy
248
+ value: 25.82
249
+ - name: 1-shot
250
+ type: accuracy
251
+ value: 25.48
252
+ - name: 3-shot
253
+ type: accuracy
254
+ value: 27.61
255
+ - name: 5-shot
256
+ type: accuracy
257
+ value: 29.96
258
+ - task:
259
+ type: text-generation
260
+ dataset:
261
+ name: OpenLLM-Ro/ro_winogrande
262
+ type: OpenLLM-Ro/ro_winogrande
263
+ metrics:
264
+ - name: 0-shot
265
+ type: accuracy
266
+ value: 58.72
267
+ - name: 1-shot
268
+ type: accuracy
269
+ value: 58.88
270
+ - name: 3-shot
271
+ type: accuracy
272
+ value: 60.38
273
+ - name: 5-shot
274
+ type: accuracy
275
+ value: 59.19
276
+ - task:
277
+ type: text-generation
278
+ dataset:
279
+ name: OpenLLM-Ro/ro_hellaswag
280
+ type: OpenLLM-Ro/ro_hellaswag
281
+ metrics:
282
+ - name: 0-shot
283
+ type: accuracy
284
+ value: 55.85
285
+ - name: 1-shot
286
+ type: accuracy
287
+ value: 57.06
288
+ - name: 3-shot
289
+ type: accuracy
290
+ value: 57.52
291
+ - name: 5-shot
292
+ type: accuracy
293
+ value: 57.89
294
+ - name: 10-shot
295
+ type: accuracy
296
+ value: 57.79
297
+ - task:
298
+ type: text-generation
299
+ dataset:
300
+ name: OpenLLM-Ro/ro_gsm8k
301
+ type: OpenLLM-Ro/ro_gsm8k
302
+ metrics:
303
+ - name: 0-shot
304
+ type: accuracy
305
+ value: 0.0
306
+ - name: 1-shot
307
+ type: accuracy
308
+ value: 2.96
309
+ - name: 3-shot
310
+ type: accuracy
311
+ value: 4.62
312
+ - task:
313
+ type: text-generation
314
+ dataset:
315
+ name: LaRoSeDa_binary
316
+ type: LaRoSeDa_binary
317
+ metrics:
318
+ - name: 0-shot
319
+ type: macro-f1
320
+ value: 42.78
321
+ - name: 1-shot
322
+ type: macro-f1
323
+ value: 98.0
324
+ - name: 3-shot
325
+ type: macro-f1
326
+ value: 95.13
327
+ - name: 5-shot
328
+ type: macro-f1
329
+ value: 97.07
330
+ - task:
331
+ type: text-generation
332
+ dataset:
333
+ name: LaRoSeDa_multiclass
334
+ type: LaRoSeDa_multiclass
335
+ metrics:
336
+ - name: 0-shot
337
+ type: macro-f1
338
+ value: 46.41
339
+ - name: 1-shot
340
+ type: macro-f1
341
+ value: 67.36
342
+ - name: 3-shot
343
+ type: macro-f1
344
+ value: 65.16
345
+ - name: 5-shot
346
+ type: macro-f1
347
+ value: 65.23
348
+ - task:
349
+ type: text-generation
350
+ dataset:
351
+ name: WMT_EN-RO
352
+ type: WMT_EN-RO
353
+ metrics:
354
+ - name: 0-shot
355
+ type: bleu
356
+ value: 4.45
357
+ - name: 1-shot
358
+ type: bleu
359
+ value: 8.61
360
+ - name: 3-shot
361
+ type: bleu
362
+ value: 12.25
363
+ - name: 5-shot
364
+ type: bleu
365
+ value: 14.73
366
+ - task:
367
+ type: text-generation
368
+ dataset:
369
+ name: WMT_RO-EN
370
+ type: WMT_RO-EN
371
+ metrics:
372
+ - name: 0-shot
373
+ type: bleu
374
+ value: 1.29
375
+ - name: 1-shot
376
+ type: bleu
377
+ value: 10.78
378
+ - name: 3-shot
379
+ type: bleu
380
+ value: 16.82
381
+ - name: 5-shot
382
+ type: bleu
383
+ value: 23.24
384
+ - task:
385
+ type: text-generation
386
+ dataset:
387
+ name: XQuAD_EM
388
+ type: XQuAD_EM
389
+ metrics:
390
+ - name: 0-shot
391
+ type: exact_match
392
+ value: 5.2941
393
+ - name: 1-shot
394
+ type: exact_match
395
+ value: 33.9496
396
+ - name: 3-shot
397
+ type: exact_match
398
+ value: 39.2437
399
+ - name: 5-shot
400
+ type: exact_match
401
+ value: 42.1008
402
+ - task:
403
+ type: text-generation
404
+ dataset:
405
+ name: XQuAD_F1
406
+ type: XQuAD_F1
407
+ metrics:
408
+ - name: 0-shot
409
+ type: f1
410
+ value: 16.1686
411
+ - name: 1-shot
412
+ type: f1
413
+ value: 51.8437
414
+ - name: 3-shot
415
+ type: f1
416
+ value: 58.8226
417
+ - name: 5-shot
418
+ type: f1
419
+ value: 61.2867
420
+ - task:
421
+ type: text-generation
422
+ dataset:
423
+ name: STS
424
+ type: STS
425
+ metrics:
426
+ - name: 0-shot
427
+ type: spearman
428
+ value: -1.74
429
+ - name: 1-shot
430
+ type: spearman
431
+ value: 15.47
432
+ - name: 3-shot
433
+ type: spearman
434
+ value: 9.93
435
+ - task:
436
+ type: text-generation
437
+ dataset:
438
+ name: STS
439
+ type: STS
440
+ metrics:
441
+ - name: 0-shot
442
+ type: pearson
443
+ value: -1.4
444
+ - name: 1-shot
445
+ type: pearson
446
+ value: 15.0
447
+ - name: 3-shot
448
+ type: pearson
449
+ value: 10.33
450
  ---
451
 
452
  # Model Card for Model ID