|
task,metric,value,err,version
|
|
anli_r1,acc,0.332,0.014899597242811483,0
|
|
anli_r2,acc,0.336,0.014944140233795027,0
|
|
anli_r3,acc,0.3433333333333333,0.01371263383046586,0
|
|
arc_challenge,acc,0.2832764505119454,0.013167478735134575,0
|
|
arc_challenge,acc_norm,0.30204778156996587,0.013417519144716417,0
|
|
arc_easy,acc,0.6047979797979798,0.010031894052790973,0
|
|
arc_easy,acc_norm,0.5526094276094277,0.01020283238541565,0
|
|
boolq,acc,0.617125382262997,0.008501734385335951,1
|
|
cb,acc,0.39285714285714285,0.0658538889806635,1
|
|
cb,f1,0.18803418803418803,,1
|
|
copa,acc,0.73,0.044619604333847394,0
|
|
hellaswag,acc,0.45140410276837284,0.004966158142645415,0
|
|
hellaswag,acc_norm,0.5865365465046803,0.004914480534533721,0
|
|
piqa,acc,0.7486398258977149,0.010121156016819259,0
|
|
piqa,acc_norm,0.7519042437431991,0.010077118315574706,0
|
|
rte,acc,0.5415162454873647,0.029992535385373314,0
|
|
sciq,acc,0.859,0.011010914595992446,0
|
|
sciq,acc_norm,0.792,0.012841374572096928,0
|
|
storycloze_2016,acc,0.7017637626937466,0.01057924979557881,0
|
|
winogrande,acc,0.5737963693764798,0.013898585965412342,0
|
|
|