beomi commited on
Commit
47403e4
1 Parent(s): 2beaa28

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +65 -69
README.md CHANGED
@@ -85,75 +85,71 @@ Apache 2.0
85
  - Used EleutherAI's [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-harness)
86
  - 5-shot scores
87
 
88
- | Tasks |Version|Filter|n-shot| Metric |Value | |Stderr|
89
- |----------------------------------------------------------|-------|------|-----:|-----------|-----:|---|------|
90
- |haerae |N/A |none | 5|acc_norm |0.7874|± |0.0118|
91
- | | |none | 5|acc |0.7874|± |0.0118|
92
- | - haerae_general_knowledge | 1|none | 5|acc |0.5000|± |0.0378|
93
- | | |none | 5|acc_norm |0.5000|± |0.0378|
94
- | - haerae_history | 1|none | 5|acc |0.8723|± |0.0244|
95
- | | |none | 5|acc_norm |0.8723|± |0.0244|
96
- | - haerae_loan_word | 1|none | 5|acc |0.8402|± |0.0283|
97
- | | |none | 5|acc_norm |0.8402|± |0.0283|
98
- | - haerae_rare_word | 1|none | 5|acc |0.8346|± |0.0185|
99
- | | |none | 5|acc_norm |0.8346|± |0.0185|
100
- | - haerae_standard_nomenclature | 1|none | 5|acc |0.8301|± |0.0305|
101
- | | |none | 5|acc_norm |0.8301|± |0.0305|
102
- |kmmlu_direct |N/A |none | 5|exact_match|0.4205|± |0.0026|
103
- | - kmmlu_direct_accounting | 2|none | 5|exact_match|0.3700|± |0.0485|
104
- | - kmmlu_direct_agricultural_sciences | 2|none | 5|exact_match|0.3140|± |0.0147|
105
- | - kmmlu_direct_aviation_engineering_and_maintenance | 2|none | 5|exact_match|0.3870|± |0.0154|
106
- | - kmmlu_direct_biology | 2|none | 5|exact_match|0.3510|± |0.0151|
107
- | - kmmlu_direct_chemical_engineering | 2|none | 5|exact_match|0.3910|± |0.0154|
108
- | - kmmlu_direct_chemistry | 2|none | 5|exact_match|0.4000|± |0.0200|
109
- | - kmmlu_direct_civil_engineering | 2|none | 5|exact_match|0.4010|± |0.0155|
110
- | - kmmlu_direct_computer_science | 2|none | 5|exact_match|0.6520|± |0.0151|
111
- | - kmmlu_direct_construction | 2|none | 5|exact_match|0.3080|± |0.0146|
112
- | - kmmlu_direct_criminal_law | 2|none | 5|exact_match|0.3100|± |0.0328|
113
- | - kmmlu_direct_ecology | 2|none | 5|exact_match|0.4660|± |0.0158|
114
- | - kmmlu_direct_economics | 2|none | 5|exact_match|0.5385|± |0.0439|
115
- | - kmmlu_direct_education | 2|none | 5|exact_match|0.6200|± |0.0488|
116
- | - kmmlu_direct_electrical_engineering | 2|none | 5|exact_match|0.3000|± |0.0145|
117
- | - kmmlu_direct_electronics_engineering | 2|none | 5|exact_match|0.4740|± |0.0158|
118
- | - kmmlu_direct_energy_management | 2|none | 5|exact_match|0.3560|± |0.0151|
119
- | - kmmlu_direct_environmental_science | 2|none | 5|exact_match|0.2980|± |0.0145|
120
- | - kmmlu_direct_fashion | 2|none | 5|exact_match|0.4470|± |0.0157|
121
- | - kmmlu_direct_food_processing | 2|none | 5|exact_match|0.3690|± |0.0153|
122
- | - kmmlu_direct_gas_technology_and_engineering | 2|none | 5|exact_match|0.3000|± |0.0145|
123
- | - kmmlu_direct_geomatics | 2|none | 5|exact_match|0.3820|± |0.0154|
124
- | - kmmlu_direct_health | 2|none | 5|exact_match|0.5700|± |0.0498|
125
- | - kmmlu_direct_industrial_engineer | 2|none | 5|exact_match|0.3830|± |0.0154|
126
- | - kmmlu_direct_information_technology | 2|none | 5|exact_match|0.6090|± |0.0154|
127
- | - kmmlu_direct_interior_architecture_and_design | 2|none | 5|exact_match|0.5440|± |0.0158|
128
- | - kmmlu_direct_korean_history | 2|none | 5|exact_match|0.3800|± |0.0488|
129
- | - kmmlu_direct_law | 2|none | 5|exact_match|0.4670|± |0.0158|
130
- | - kmmlu_direct_machine_design_and_manufacturing | 2|none | 5|exact_match|0.3960|± |0.0155|
131
- | - kmmlu_direct_management | 2|none | 5|exact_match|0.5030|± |0.0158|
132
- | - kmmlu_direct_maritime_engineering | 2|none | 5|exact_match|0.4283|± |0.0202|
133
- | - kmmlu_direct_marketing | 2|none | 5|exact_match|0.7460|± |0.0138|
134
- | - kmmlu_direct_materials_engineering | 2|none | 5|exact_match|0.4020|± |0.0155|
135
- | - kmmlu_direct_math | 2|none | 5|exact_match|0.2867|± |0.0262|
136
- | - kmmlu_direct_mechanical_engineering | 2|none | 5|exact_match|0.3490|± |0.0151|
137
- | - kmmlu_direct_nondestructive_testing | 2|none | 5|exact_match|0.3760|± |0.0153|
138
- | - kmmlu_direct_patent | 2|none | 5|exact_match|0.3700|± |0.0485|
139
- | - kmmlu_direct_political_science_and_sociology | 2|none | 5|exact_match|0.5300|± |0.0289|
140
- | - kmmlu_direct_psychology | 2|none | 5|exact_match|0.4470|± |0.0157|
141
- | - kmmlu_direct_public_safety | 2|none | 5|exact_match|0.3520|± |0.0151|
142
- | - kmmlu_direct_railway_and_automotive_engineering | 2|none | 5|exact_match|0.3220|± |0.0148|
143
- | - kmmlu_direct_real_estate | 2|none | 5|exact_match|0.4350|± |0.0351|
144
- | - kmmlu_direct_refrigerating_machinery | 2|none | 5|exact_match|0.3240|± |0.0148|
145
- | - kmmlu_direct_social_welfare | 2|none | 5|exact_match|0.4970|± |0.0158|
146
- | - kmmlu_direct_taxation | 2|none | 5|exact_match|0.3800|± |0.0344|
147
- | - kmmlu_direct_telecommunications_and_wireless_technology| 2|none | 5|exact_match|0.5480|± |0.0157|
148
- |kobest_boolq | 1|none | 5|acc |0.9202|± |0.0072|
149
- | | |none | 5|f1 |0.9202|± |N/A |
150
- |kobest_copa | 1|none | 5|acc |0.8680|± |0.0107|
151
- | | |none | 5|f1 |0.8678|± |N/A |
152
- |kobest_hellaswag | 1|none | 5|acc |0.5560|± |0.0222|
153
- | | |none | 5|f1 |0.5520|± |N/A |
154
- | | |none | 5|acc_norm |0.6540|± |0.0213|
155
- |kobest_sentineg | 1|none | 5|acc |0.9824|± |0.0066|
156
- | | |none | 5|f1 |0.9824|± |N/A |
157
 
158
  ## Citation
159
 
 
85
  - Used EleutherAI's [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-harness)
86
  - 5-shot scores
87
 
88
+ | Tasks | Metric | Value | | Stderr |
89
+ |----------------------------------------------------------|-----------|--------:|---|--------:|
90
+ |haerae |acc_norm | 0.7874 |± | 0.0118 |
91
+ | - haerae_general_knowledge |acc | 0.5000 |± | 0.0378 |
92
+ | - haerae_history |acc | 0.8723 |± | 0.0244 |
93
+ | - haerae_loan_word |acc | 0.8402 |± | 0.0283 |
94
+ | - haerae_rare_word |acc | 0.8346 |± | 0.0185 |
95
+ | - haerae_standard_nomenclature |acc | 0.8301 |± | 0.0305 |
96
+ |kmmlu_direct |exact_match| 0.4205 |± | 0.0026 |
97
+ | - kmmlu_direct_accounting |exact_match| 0.3700 |± | 0.0485 |
98
+ | - kmmlu_direct_agricultural_sciences |exact_match| 0.3140 |± | 0.0147 |
99
+ | - kmmlu_direct_aviation_engineering_and_maintenance |exact_match| 0.3870 |± | 0.0154 |
100
+ | - kmmlu_direct_biology |exact_match| 0.3510 |± | 0.0151 |
101
+ | - kmmlu_direct_chemical_engineering |exact_match| 0.3910 |± | 0.0154 |
102
+ | - kmmlu_direct_chemistry |exact_match| 0.4000 |± | 0.0200 |
103
+ | - kmmlu_direct_civil_engineering |exact_match| 0.4010 |± | 0.0155 |
104
+ | - kmmlu_direct_computer_science |exact_match| 0.6520 |± | 0.0151 |
105
+ | - kmmlu_direct_construction |exact_match| 0.3080 |± | 0.0146 |
106
+ | - kmmlu_direct_criminal_law |exact_match| 0.3100 |± | 0.0328 |
107
+ | - kmmlu_direct_ecology |exact_match| 0.4660 |± | 0.0158 |
108
+ | - kmmlu_direct_economics |exact_match| 0.5385 |± | 0.0439 |
109
+ | - kmmlu_direct_education |exact_match| 0.6200 |± | 0.0488 |
110
+ | - kmmlu_direct_electrical_engineering |exact_match| 0.3000 |± | 0.0145 |
111
+ | - kmmlu_direct_electronics_engineering |exact_match| 0.4740 |± | 0.0158 |
112
+ | - kmmlu_direct_energy_management |exact_match| 0.3560 |± | 0.0151 |
113
+ | - kmmlu_direct_environmental_science |exact_match| 0.2980 |± | 0.0145 |
114
+ | - kmmlu_direct_fashion |exact_match| 0.4470 |± | 0.0157 |
115
+ | - kmmlu_direct_food_processing |exact_match| 0.3690 |± | 0.0153 |
116
+ | - kmmlu_direct_gas_technology_and_engineering |exact_match| 0.3000 |± | 0.0145 |
117
+ | - kmmlu_direct_geomatics |exact_match| 0.3820 |± | 0.0154 |
118
+ | - kmmlu_direct_health |exact_match| 0.5700 |± | 0.0498 |
119
+ | - kmmlu_direct_industrial_engineer |exact_match| 0.3830 |± | 0.0154 |
120
+ | - kmmlu_direct_information_technology |exact_match| 0.6090 |± | 0.0154 |
121
+ | - kmmlu_direct_interior_architecture_and_design |exact_match| 0.5440 |± | 0.0158 |
122
+ | - kmmlu_direct_korean_history |exact_match| 0.3800 |± | 0.0488 |
123
+ | - kmmlu_direct_law |exact_match| 0.4670 |± | 0.0158 |
124
+ | - kmmlu_direct_machine_design_and_manufacturing |exact_match| 0.3960 |± | 0.0155 |
125
+ | - kmmlu_direct_management |exact_match| 0.5030 |± | 0.0158 |
126
+ | - kmmlu_direct_maritime_engineering |exact_match| 0.4283 |± | 0.0202 |
127
+ | - kmmlu_direct_marketing |exact_match| 0.7460 |± | 0.0138 |
128
+ | - kmmlu_direct_materials_engineering |exact_match| 0.4020 |± | 0.0155 |
129
+ | - kmmlu_direct_math |exact_match| 0.2867 |± | 0.0262 |
130
+ | - kmmlu_direct_mechanical_engineering |exact_match| 0.3490 |± | 0.0151 |
131
+ | - kmmlu_direct_nondestructive_testing |exact_match| 0.3760 |± | 0.0153 |
132
+ | - kmmlu_direct_patent |exact_match| 0.3700 |± | 0.0485 |
133
+ | - kmmlu_direct_political_science_and_sociology |exact_match| 0.5300 |± | 0.0289 |
134
+ | - kmmlu_direct_psychology |exact_match| 0.4470 |± | 0.0157 |
135
+ | - kmmlu_direct_public_safety |exact_match| 0.3520 |± | 0.0151 |
136
+ | - kmmlu_direct_railway_and_automotive_engineering |exact_match| 0.3220 |± | 0.0148 |
137
+ | - kmmlu_direct_real_estate |exact_match| 0.4350 |± | 0.0351 |
138
+ | - kmmlu_direct_refrigerating_machinery |exact_match| 0.3240 |± | 0.0148 |
139
+ | - kmmlu_direct_social_welfare |exact_match| 0.4970 |± | 0.0158 |
140
+ | - kmmlu_direct_taxation |exact_match| 0.3800 |± | 0.0344 |
141
+ | - kmmlu_direct_telecommunications_and_wireless_technology|exact_match| 0.5480 |± | 0.0157 |
142
+ |kobest_boolq |acc | 0.9202 |± | 0.0072 |
143
+ | |f1 | 0.9202 |± |N/A |
144
+ |kobest_copa |acc | 0.8680 |± | 0.0107 |
145
+ | |f1 | 0.8678 |± |N/A |
146
+ |kobest_hellaswag |acc | 0.5560 |± | 0.0222 |
147
+ | |f1 | 0.5520 |± |N/A |
148
+ | |acc_norm | 0.6540 |± | 0.0213 |
149
+ |kobest_sentineg |acc | 0.9824 | 0.0066 |
150
+ | |f1 | 0.9824 |± |N/A |
151
+
152
+
 
 
 
 
153
 
154
  ## Citation
155