|
task,metric,value,err,version
|
|
anli_r1,acc,0.325,0.014818724459095524,0
|
|
anli_r2,acc,0.336,0.014944140233795021,0
|
|
anli_r3,acc,0.3233333333333333,0.013508372867300212,0
|
|
arc_challenge,acc,0.30119453924914674,0.013406741767847624,0
|
|
arc_challenge,acc_norm,0.32337883959044367,0.01366942163001213,0
|
|
arc_easy,acc,0.6372053872053872,0.009865936757013942,0
|
|
arc_easy,acc_norm,0.6186868686868687,0.009966542497171021,0
|
|
boolq,acc,0.6241590214067279,0.008471147248160107,1
|
|
cb,acc,0.39285714285714285,0.0658538889806635,1
|
|
cb,f1,0.3565868967138097,,1
|
|
copa,acc,0.81,0.03942772444036623,0
|
|
hellaswag,acc,0.4790878311093408,0.004985415250690914,0
|
|
hellaswag,acc_norm,0.634833698466441,0.004804927608773137,0
|
|
piqa,acc,0.7540805223068553,0.01004733186562519,0
|
|
piqa,acc_norm,0.7687704026115343,0.009837063180625334,0
|
|
rte,acc,0.6064981949458483,0.029405839314203194,0
|
|
sciq,acc,0.91,0.00905439020486644,0
|
|
sciq,acc_norm,0.897,0.009616833339695796,0
|
|
storycloze_2016,acc,0.7295563869588455,0.010271810373331027,0
|
|
winogrande,acc,0.585635359116022,0.013844846232268563,0
|
|
|