OpenLLM-Ro
/

RoLlama2-7b-Base-2024-05-14

@@ -4,311 +4,449 @@ language:
 - ro
 base_model: meta-llama/Llama-2-7b-hf
 model-index:
-        - name: OpenLLM-Ro/RoLlama2-7b-Base
-          results:
-                - task:
-                        type: text-generation
-                  dataset:
-                        name: Romanian_Academic_Benchmarks
-                        type: Romanian_Academic_Benchmarks
-                  metrics:
-                        - name: Average accuracy
-                          type: accuracy
-                          value: 38.033944444444444
-                - task:
-                        type: text-generation
-                  dataset:
-                        name: OpenLLM-Ro/ro_arc_challenge
-                        type: OpenLLM-Ro/ro_arc_challenge
-                  metrics:
-                        - name: Average accuracy
-                          type: accuracy
-                          value: 37.945
-                - task:
-                        type: text-generation
-                  dataset:
-                        name: OpenLLM-Ro/ro_mmlu
-                        type: OpenLLM-Ro/ro_mmlu
-                  metrics:
-                        - name: Average accuracy
-                          type: accuracy
-                          value: 27.2175
-                - task:
-                        type: text-generation
-                  dataset:
-                        name: OpenLLM-Ro/ro_winogrande
-                        type: OpenLLM-Ro/ro_winogrande
-                  metrics:
-                        - name: Average accuracy
-                          type: accuracy
-                          value: 59.2925
-                - task:
-                        type: text-generation
-                  dataset:
-                        name: OpenLLM-Ro/ro_hellaswag
-                        type: OpenLLM-Ro/ro_hellaswag
-                  metrics:
-                        - name: Average accuracy
-                          type: accuracy
-                          value: 57.222
-                - task:
-                        type: text-generation
-                  dataset:
-                        name: OpenLLM-Ro/ro_gsm8k
-                        type: OpenLLM-Ro/ro_gsm8k
-                  metrics:
-                        - name: Average accuracy
-                          type: accuracy
-                          value: 2.526666666666667
-                - task:
-                        type: text-generation
-                  dataset:
-                        name: OpenLLM-Ro/ro_truthfulqa
-                        type: OpenLLM-Ro/ro_truthfulqa
-                  metrics:
-                        - name: Average accuracy
-                          type: accuracy
-                          value: 44.0
-                - task:
-                        type: text-generation
-                  dataset:
-                        name: LaRoSeDa_binary
-                        type: LaRoSeDa_binary
-                  metrics:
-                        - name: Average macro-f1
-                          type: macro-f1
-                          value: 83.245
-                - task:
-                        type: text-generation
-                  dataset:
-                        name: LaRoSeDa_multiclass
-                        type: LaRoSeDa_multiclass
-                  metrics:
-                        - name: Average macro-f1
-                          type: macro-f1
-                          value: 61.040000000000006
-                - task:
-                        type: text-generation
-                  dataset:
-                        name: LaRoSeDa_binary_finetuned
-                        type: LaRoSeDa_binary_finetuned
-                  metrics:
-                        - name: Average macro-f1
-                          type: macro-f1
-                          value: 98.9666527738873
-                - task:
-                        type: text-generation
-                  dataset:
-                        name: LaRoSeDa_multiclass_finetuned
-                        type: LaRoSeDa_multiclass_finetuned
-                  metrics:
-                        - name: Average macro-f1
-                          type: macro-f1
-                          value: 87.7198050161592
-                - task:
-                        type: text-generation
-                  dataset:
-                        name: WMT_EN-RO
-                        type: WMT_EN-RO
-                  metrics:
-                        - name: Average bleu
-                          type: bleu
-                          value: 10.01
-                - task:
-                        type: text-generation
-                  dataset:
-                        name: WMT_RO-EN
-                        type: WMT_RO-EN
-                  metrics:
-                        - name: Average bleu
-                          type: bleu
-                          value: 13.032499999999999
-                - task:
-                        type: text-generation
-                  dataset:
-                        name: WMT_EN-RO_finetuned
-                        type: WMT_EN-RO_finetuned
-                  metrics:
-                        - name: Average bleu
-                          type: bleu
-                          value: 27.8468226696502
-                - task:
-                        type: text-generation
-                  dataset:
-                        name: WMT_RO-EN_finetuned
-                        type: WMT_RO-EN_finetuned
-                  metrics:
-                        - name: Average bleu
-                          type: bleu
-                          value: 39.300054525566104
-                - task:
-                        type: text-generation
-                  dataset:
-                        name: XQuAD
-                        type: XQuAD
-                  metrics:
-                        - name: Average exact_match
-                          type: exact_match
-                          value: 30.14705
-                - task:
-                        type: text-generation
-                  dataset:
-                        name: XQuAD
-                        type: XQuAD
-                  metrics:
-                        - name: Average f1
-                          type: f1
-                          value: 47.0304
-                - task:
-                        type: text-generation
-                  dataset:
-                        name: XQuAD_finetuned
-                        type: XQuAD_finetuned
-                  metrics:
-                        - name: Average exact_match
-                          type: exact_match
-                          value: 67.0588235294117
-                - task:
-                        type: text-generation
-                  dataset:
-                        name: XQuAD
-                        type: XQuAD
-                  metrics:
-                        - name: Average f1
-                          type: f1
-                          value: 79.9624140326139
-                - task:
-                        type: text-generation
-                  dataset:
-                        name: STS
-                        type: STS
-                  metrics:
-                        - name: Average spearman
-                          type: spearman
-                          value: 7.886666666666667
-                - task:
-                        type: text-generation
-                  dataset:
-                        name: STS
-                        type: STS
-                  metrics:
-                        - name: Average pearson
-                          type: pearson
-                          value: 7.976666666666667
-                - task:
-                        type: text-generation
-                  dataset:
-                        name: STS_finetuned
-                        type: STS_finetuned
-                  metrics:
-                        - name: Average spearman
-                          type: spearman
-                          value: 71.749068976055
-                - task:
-                        type: text-generation
-                  dataset:
-                        name: STS_finetuned
-                        type: STS_finetuned
-                  metrics:
-                        - name: Average pearson
-                          type: pearson
-                          value: 71.9866084841066
-                - task:
-                        type: text-generation
-                  dataset:
-                        name: OpenLLM-Ro/ro_arc_challenge
-                        type: OpenLLM-Ro/ro_arc_challenge
-                  metrics:
-                        - name: 0-shot
-                          type: accuracy
-                          value: 35.56
-                        - name: 1-shot
-                          type: accuracy
-                          value: 36.42
-                        - name: 3-shot
-                          type: accuracy
-                          value: 38.56
-                        - name: 5-shot
-                          type: accuracy
-                          value: 38.39
-                        - name: 10-shot
-                          type: accuracy
-                          value: 39.07
-                        - name: 25-shot
-                          type: accuracy
-                          value: 39.67
-                - task:
-                        type: text-generation
-                  dataset:
-                        name: OpenLLM-Ro/ro_mmlu
-                        type: OpenLLM-Ro/ro_mmlu
-                  metrics:
-                        - name: 0-shot
-                          type: accuracy
-                          value: 25.82
-                        - name: 1-shot
-                          type: accuracy
-                          value: 25.48
-                        - name: 3-shot
-                          type: accuracy
-                          value: 27.61
-                        - name: 5-shot
-                          type: accuracy
-                          value: 29.96
-                - task:
-                        type: text-generation
-                  dataset:
-                        name: OpenLLM-Ro/ro_winogrande
-                        type: OpenLLM-Ro/ro_winogrande
-                  metrics:
-                        - name: 0-shot
-                          type: accuracy
-                          value: 58.72
-                        - name: 1-shot
-                          type: accuracy
-                          value: 58.88
-                        - name: 3-shot
-                          type: accuracy
-                          value: 60.38
-                        - name: 5-shot
-                          type: accuracy
-                          value: 59.19
-                - task:
-                        type: text-generation
-                  dataset:
-                        name: OpenLLM-Ro/ro_hellaswag
-                        type: OpenLLM-Ro/ro_hellaswag
-                  metrics:
-                        - name: 0-shot
-                          type: accuracy
-                          value: 55.85
-                        - name: 1-shot
-                          type: accuracy
-                          value: 57.06
-                        - name: 3-shot
-                          type: accuracy
-                          value: 57.52
-                        - name: 5-shot
-                          type: accuracy
-                          value: 57.89
-                        - name: 10-shot
-                          type: accuracy
-                          value: 57.79
-                - task:
-                        type: text-generation
-                  dataset:
-                        name: OpenLLM-Ro/ro_gsm8k
-                        type: OpenLLM-Ro/ro_gsm8k
-                  metrics:
-                        - name: 0-shot
-                          type: accuracy
-                          value: 0.0
-                        - name: 1-shot
-                          type: accuracy
-                          value: 2.96
-                        - name: 3-shot
-                          type: accuracy
-                          value: 4.62
 ---
 # Model Card for Model ID

 - ro
 base_model: meta-llama/Llama-2-7b-hf
 model-index:
+    - name: OpenLLM-Ro/RoLlama2-7b-Base
+      results:
+        - task:
+            type: text-generation
+          dataset:
+            name: Romanian_Academic_Benchmarks
+            type: Romanian_Academic_Benchmarks
+          metrics:
+            - name: Average accuracy
+              type: accuracy
+              value: 38.033944444444444
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_arc_challenge
+            type: OpenLLM-Ro/ro_arc_challenge
+          metrics:
+            - name: Average accuracy
+              type: accuracy
+              value: 37.945
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_mmlu
+            type: OpenLLM-Ro/ro_mmlu
+          metrics:
+            - name: Average accuracy
+              type: accuracy
+              value: 27.2175
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_winogrande
+            type: OpenLLM-Ro/ro_winogrande
+          metrics:
+            - name: Average accuracy
+              type: accuracy
+              value: 59.2925
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_hellaswag
+            type: OpenLLM-Ro/ro_hellaswag
+          metrics:
+            - name: Average accuracy
+              type: accuracy
+              value: 57.222
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_gsm8k
+            type: OpenLLM-Ro/ro_gsm8k
+          metrics:
+            - name: Average accuracy
+              type: accuracy
+              value: 2.526666666666667
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_truthfulqa
+            type: OpenLLM-Ro/ro_truthfulqa
+          metrics:
+            - name: Average accuracy
+              type: accuracy
+              value: 44.0
+        - task:
+            type: text-generation
+          dataset:
+            name: LaRoSeDa_binary
+            type: LaRoSeDa_binary
+          metrics:
+            - name: Average macro-f1
+              type: macro-f1
+              value: 83.245
+        - task:
+            type: text-generation
+          dataset:
+            name: LaRoSeDa_multiclass
+            type: LaRoSeDa_multiclass
+          metrics:
+            - name: Average macro-f1
+              type: macro-f1
+              value: 61.040000000000006
+        - task:
+            type: text-generation
+          dataset:
+            name: LaRoSeDa_binary_finetuned
+            type: LaRoSeDa_binary_finetuned
+          metrics:
+            - name: Average macro-f1
+              type: macro-f1
+              value: 98.9666527738873
+        - task:
+            type: text-generation
+          dataset:
+            name: LaRoSeDa_multiclass_finetuned
+            type: LaRoSeDa_multiclass_finetuned
+          metrics:
+            - name: Average macro-f1
+              type: macro-f1
+              value: 87.7198050161592
+        - task:
+            type: text-generation
+          dataset:
+            name: WMT_EN-RO
+            type: WMT_EN-RO
+          metrics:
+            - name: Average bleu
+              type: bleu
+              value: 10.01
+        - task:
+            type: text-generation
+          dataset:
+            name: WMT_RO-EN
+            type: WMT_RO-EN
+          metrics:
+            - name: Average bleu
+              type: bleu
+              value: 13.032499999999999
+        - task:
+            type: text-generation
+          dataset:
+            name: WMT_EN-RO_finetuned
+            type: WMT_EN-RO_finetuned
+          metrics:
+            - name: Average bleu
+              type: bleu
+              value: 27.8468226696502
+        - task:
+            type: text-generation
+          dataset:
+            name: WMT_RO-EN_finetuned
+            type: WMT_RO-EN_finetuned
+          metrics:
+            - name: Average bleu
+              type: bleu
+              value: 39.300054525566104
+        - task:
+            type: text-generation
+          dataset:
+            name: XQuAD
+            type: XQuAD
+          metrics:
+            - name: Average exact_match
+              type: exact_match
+              value: 30.14705
+        - task:
+            type: text-generation
+          dataset:
+            name: XQuAD
+            type: XQuAD
+          metrics:
+            - name: Average f1
+              type: f1
+              value: 47.0304
+        - task:
+            type: text-generation
+          dataset:
+            name: XQuAD_finetuned
+            type: XQuAD_finetuned
+          metrics:
+            - name: Average exact_match
+              type: exact_match
+              value: 67.0588235294117
+        - task:
+            type: text-generation
+          dataset:
+            name: XQuAD_finetuned
+            type: XQuAD_finetuned
+          metrics:
+            - name: Average f1
+              type: f1
+              value: 79.9624140326139
+        - task:
+            type: text-generation
+          dataset:
+            name: STS
+            type: STS
+          metrics:
+            - name: Average spearman
+              type: spearman
+              value: 7.886666666666667
+        - task:
+            type: text-generation
+          dataset:
+            name: STS
+            type: STS
+          metrics:
+            - name: Average pearson
+              type: pearson
+              value: 7.976666666666667
+        - task:
+            type: text-generation
+          dataset:
+            name: STS_finetuned
+            type: STS_finetuned
+          metrics:
+            - name: Average spearman
+              type: spearman
+              value: 71.749068976055
+        - task:
+            type: text-generation
+          dataset:
+            name: STS_finetuned
+            type: STS_finetuned
+          metrics:
+            - name: Average pearson
+              type: pearson
+              value: 71.9866084841066
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_arc_challenge
+            type: OpenLLM-Ro/ro_arc_challenge
+          metrics:
+            - name: 0-shot
+              type: accuracy
+              value: 35.56
+            - name: 1-shot
+              type: accuracy
+              value: 36.42
+            - name: 3-shot
+              type: accuracy
+              value: 38.56
+            - name: 5-shot
+              type: accuracy
+              value: 38.39
+            - name: 10-shot
+              type: accuracy
+              value: 39.07
+            - name: 25-shot
+              type: accuracy
+              value: 39.67
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_mmlu
+            type: OpenLLM-Ro/ro_mmlu
+          metrics:
+            - name: 0-shot
+              type: accuracy
+              value: 25.82
+            - name: 1-shot
+              type: accuracy
+              value: 25.48
+            - name: 3-shot
+              type: accuracy
+              value: 27.61
+            - name: 5-shot
+              type: accuracy
+              value: 29.96
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_winogrande
+            type: OpenLLM-Ro/ro_winogrande
+          metrics:
+            - name: 0-shot
+              type: accuracy
+              value: 58.72
+            - name: 1-shot
+              type: accuracy
+              value: 58.88
+            - name: 3-shot
+              type: accuracy
+              value: 60.38
+            - name: 5-shot
+              type: accuracy
+              value: 59.19
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_hellaswag
+            type: OpenLLM-Ro/ro_hellaswag
+          metrics:
+            - name: 0-shot
+              type: accuracy
+              value: 55.85
+            - name: 1-shot
+              type: accuracy
+              value: 57.06
+            - name: 3-shot
+              type: accuracy
+              value: 57.52
+            - name: 5-shot
+              type: accuracy
+              value: 57.89
+            - name: 10-shot
+              type: accuracy
+              value: 57.79
+        - task:
+            type: text-generation
+          dataset:
+            name: OpenLLM-Ro/ro_gsm8k
+            type: OpenLLM-Ro/ro_gsm8k
+          metrics:
+            - name: 0-shot
+              type: accuracy
+              value: 0.0
+            - name: 1-shot
+              type: accuracy
+              value: 2.96
+            - name: 3-shot
+              type: accuracy
+              value: 4.62
+        - task:
+            type: text-generation
+          dataset:
+            name: LaRoSeDa_binary
+            type: LaRoSeDa_binary
+          metrics:
+            - name: 0-shot
+              type: macro-f1
+              value: 42.78
+            - name: 1-shot
+              type: macro-f1
+              value: 98.0
+            - name: 3-shot
+              type: macro-f1
+              value: 95.13
+            - name: 5-shot
+              type: macro-f1
+              value: 97.07
+        - task:
+            type: text-generation
+          dataset:
+            name: LaRoSeDa_multiclass
+            type: LaRoSeDa_multiclass
+          metrics:
+            - name: 0-shot
+              type: macro-f1
+              value: 46.41
+            - name: 1-shot
+              type: macro-f1
+              value: 67.36
+            - name: 3-shot
+              type: macro-f1
+              value: 65.16
+            - name: 5-shot
+              type: macro-f1
+              value: 65.23
+        - task:
+            type: text-generation
+          dataset:
+            name: WMT_EN-RO
+            type: WMT_EN-RO
+          metrics:
+            - name: 0-shot
+              type: bleu
+              value: 4.45
+            - name: 1-shot
+              type: bleu
+              value: 8.61
+            - name: 3-shot
+              type: bleu
+              value: 12.25
+            - name: 5-shot
+              type: bleu
+              value: 14.73
+        - task:
+            type: text-generation
+          dataset:
+            name: WMT_RO-EN
+            type: WMT_RO-EN
+          metrics:
+            - name: 0-shot
+              type: bleu
+              value: 1.29
+            - name: 1-shot
+              type: bleu
+              value: 10.78
+            - name: 3-shot
+              type: bleu
+              value: 16.82
+            - name: 5-shot
+              type: bleu
+              value: 23.24
+        - task:
+            type: text-generation
+          dataset:
+            name: XQuAD_EM
+            type: XQuAD_EM
+          metrics:
+            - name: 0-shot
+              type: exact_match
+              value: 5.2941
+            - name: 1-shot
+              type: exact_match
+              value: 33.9496
+            - name: 3-shot
+              type: exact_match
+              value: 39.2437
+            - name: 5-shot
+              type: exact_match
+              value: 42.1008
+        - task:
+            type: text-generation
+          dataset:
+            name: XQuAD_F1
+            type: XQuAD_F1
+          metrics:
+            - name: 0-shot
+              type: f1
+              value: 16.1686
+            - name: 1-shot
+              type: f1
+              value: 51.8437
+            - name: 3-shot
+              type: f1
+              value: 58.8226
+            - name: 5-shot
+              type: f1
+              value: 61.2867
+        - task:
+            type: text-generation
+          dataset:
+            name: STS
+            type: STS
+          metrics:
+            - name: 0-shot
+              type: spearman
+              value: -1.74
+            - name: 1-shot
+              type: spearman
+              value: 15.47
+            - name: 3-shot
+              type: spearman
+              value: 9.93
+        - task:
+            type: text-generation
+          dataset:
+            name: STS
+            type: STS
+          metrics:
+            - name: 0-shot
+              type: pearson
+              value: -1.4
+            - name: 1-shot
+              type: pearson
+              value: 15.0
+            - name: 3-shot
+              type: pearson
+              value: 10.33
 ---
 # Model Card for Model ID