task,metric,value,err,version anli_r1,acc,0.348,0.015070604603768408,0 anli_r2,acc,0.36,0.01518652793204012,0 anli_r3,acc,0.35083333333333333,0.013782212417178195,0 arc_challenge,acc,0.2901023890784983,0.013261573677520769,0 arc_challenge,acc_norm,0.31143344709897613,0.013532472099850949,0 arc_easy,acc,0.6266835016835017,0.009925009142802903,0 arc_easy,acc_norm,0.6203703703703703,0.009958037725468558,0 boolq,acc,0.5498470948012233,0.008701488203356937,1 cb,acc,0.48214285714285715,0.0673769750864465,1 cb,f1,0.40387403446226977,,1 copa,acc,0.79,0.040936018074033256,0 hellaswag,acc,0.4567815176259709,0.004971106265046551,0 hellaswag,acc_norm,0.5992830113523202,0.004890422457747258,0 piqa,acc,0.7578890097932536,0.009994371269104387,0 piqa,acc_norm,0.7682263329706203,0.00984514377279405,0 rte,acc,0.48375451263537905,0.030080573208738064,0 sciq,acc,0.923,0.008434580140240632,0 sciq,acc_norm,0.903,0.00936368937324812,0 storycloze_2016,acc,0.7247461250668092,0.010328538400500567,0 winogrande,acc,0.569060773480663,0.013917796623335966,0