task,metric,value,err,version anli_r1,acc,0.322,0.014782913600996666,0 anli_r2,acc,0.357,0.015158521721486774,0 anli_r3,acc,0.34833333333333333,0.013759437498874079,0 arc_challenge,acc,0.2901023890784983,0.01326157367752077,0 arc_challenge,acc_norm,0.3250853242320819,0.013688147309729124,0 arc_easy,acc,0.6321548821548821,0.009894923464455193,0 arc_easy,acc_norm,0.61489898989899,0.00998521479873725,0 boolq,acc,0.563914373088685,0.008673312776324932,1 cb,acc,0.32142857142857145,0.06297362289056341,1 cb,f1,0.2855772439105772,,1 copa,acc,0.77,0.042295258468165044,0 hellaswag,acc,0.3828918542123083,0.004850988215167546,0 hellaswag,acc_norm,0.4871539533957379,0.004988134303021793,0 piqa,acc,0.7600652883569097,0.009963625892809544,0 piqa,acc_norm,0.7687704026115343,0.009837063180625334,0 rte,acc,0.4729241877256318,0.030052303463143713,0 sciq,acc,0.85,0.0112972398234093,0 sciq,acc_norm,0.842,0.01153989467755957,0 storycloze_2016,acc,0.6702298236237306,0.010871682471395135,0 winogrande,acc,0.5722178374112076,0.013905134013839943,0