task,metric,value,err,version anli_r1,acc,0.325,0.014818724459095524,0 anli_r2,acc,0.336,0.014944140233795021,0 anli_r3,acc,0.3233333333333333,0.013508372867300212,0 arc_challenge,acc,0.30119453924914674,0.013406741767847624,0 arc_challenge,acc_norm,0.32337883959044367,0.01366942163001213,0 arc_easy,acc,0.6372053872053872,0.009865936757013942,0 arc_easy,acc_norm,0.6186868686868687,0.009966542497171021,0 boolq,acc,0.6241590214067279,0.008471147248160107,1 cb,acc,0.39285714285714285,0.0658538889806635,1 cb,f1,0.3565868967138097,,1 copa,acc,0.81,0.03942772444036623,0 hellaswag,acc,0.4790878311093408,0.004985415250690914,0 hellaswag,acc_norm,0.634833698466441,0.004804927608773137,0 piqa,acc,0.7540805223068553,0.01004733186562519,0 piqa,acc_norm,0.7687704026115343,0.009837063180625334,0 rte,acc,0.6064981949458483,0.029405839314203194,0 sciq,acc,0.91,0.00905439020486644,0 sciq,acc_norm,0.897,0.009616833339695796,0 storycloze_2016,acc,0.7295563869588455,0.010271810373331027,0 winogrande,acc,0.585635359116022,0.013844846232268563,0