|
task,metric,value,err,version
|
|
anli_r1,acc,0.36,0.015186527932040117,0
|
|
anli_r2,acc,0.347,0.015060472031706625,0
|
|
anli_r3,acc,0.3625,0.01388303787422552,0
|
|
arc_challenge,acc,0.302901023890785,0.013428241573185349,0
|
|
arc_challenge,acc_norm,0.32337883959044367,0.013669421630012129,0
|
|
arc_easy,acc,0.640993265993266,0.009843424713072174,0
|
|
arc_easy,acc_norm,0.6186868686868687,0.009966542497171025,0
|
|
boolq,acc,0.545565749235474,0.008708665643758015,1
|
|
cb,acc,0.5535714285714286,0.06703189227942395,1
|
|
cb,f1,0.4538378958668814,,1
|
|
copa,acc,0.79,0.040936018074033256,0
|
|
hellaswag,acc,0.45180242979486157,0.004966544724452227,0
|
|
hellaswag,acc_norm,0.5955984863572994,0.004897728370737246,0
|
|
piqa,acc,0.7578890097932536,0.009994371269104385,0
|
|
piqa,acc_norm,0.7752992383025027,0.009738282586548389,0
|
|
rte,acc,0.48375451263537905,0.030080573208738064,0
|
|
sciq,acc,0.915,0.008823426366942331,0
|
|
sciq,acc_norm,0.912,0.008963053962592085,0
|
|
storycloze_2016,acc,0.7177979690005345,0.010407834479647672,0
|
|
winogrande,acc,0.5706393054459353,0.013911537499969163,0
|
|
|