|
task,metric,value,err,version
|
|
anli_r1,acc,0.337,0.014955087918653616,0
|
|
anli_r2,acc,0.328,0.014853842487270334,0
|
|
anli_r3,acc,0.33916666666666667,0.013672343491681822,0
|
|
arc_challenge,acc,0.2696245733788396,0.012968040686869143,0
|
|
arc_challenge,acc_norm,0.28924914675767915,0.013250012579393443,0
|
|
arc_easy,acc,0.57996632996633,0.010127718838529321,0
|
|
arc_easy,acc_norm,0.5681818181818182,0.010163945352271733,0
|
|
boolq,acc,0.5804281345565749,0.008631175489166726,1
|
|
cb,acc,0.32142857142857145,0.06297362289056341,1
|
|
cb,f1,0.24285714285714288,,1
|
|
copa,acc,0.74,0.04408440022768078,0
|
|
hellaswag,acc,0.4298944433379805,0.004940490508240647,0
|
|
hellaswag,acc_norm,0.5665206134236208,0.004945424771611602,0
|
|
piqa,acc,0.7334058759521219,0.010316749863541367,0
|
|
piqa,acc_norm,0.7486398258977149,0.010121156016819245,0
|
|
rte,acc,0.4657039711191336,0.030025579819366426,0
|
|
sciq,acc,0.891,0.00985982840703719,0
|
|
sciq,acc_norm,0.882,0.01020686926438179,0
|
|
storycloze_2016,acc,0.6910742918225548,0.010684853966268455,0
|
|
winogrande,acc,0.5461720599842147,0.01399244156370706,0
|
|
|