|
task,metric,value,err,version
|
|
anli_r1,acc,0.363,0.015213890444671281,0
|
|
anli_r2,acc,0.347,0.015060472031706624,0
|
|
anli_r3,acc,0.34,0.013680495725767794,0
|
|
arc_challenge,acc,0.2986348122866894,0.013374078615068756,0
|
|
arc_challenge,acc_norm,0.310580204778157,0.013522292098053052,0
|
|
arc_easy,acc,0.6447811447811448,0.009820245899287117,0
|
|
arc_easy,acc_norm,0.625,0.009933992677987828,0
|
|
boolq,acc,0.5376146788990825,0.008720273736433679,1
|
|
cb,acc,0.5535714285714286,0.06703189227942397,1
|
|
cb,f1,0.3974410235905637,,1
|
|
copa,acc,0.81,0.03942772444036623,0
|
|
hellaswag,acc,0.44981079466241786,0.004964579685712439,0
|
|
hellaswag,acc_norm,0.6002788289185421,0.004888398535520516,0
|
|
piqa,acc,0.7584330794341676,0.00998671800180446,0
|
|
piqa,acc_norm,0.7671381936887922,0.009861236071080757,0
|
|
rte,acc,0.49097472924187724,0.030091559826331334,0
|
|
sciq,acc,0.918,0.00868051561552374,0
|
|
sciq,acc_norm,0.908,0.009144376393151117,0
|
|
storycloze_2016,acc,0.7113842864778194,0.01047831178564294,0
|
|
winogrande,acc,0.5785319652722968,0.013878072377497603,0
|
|
|