task,metric,value,err,version anli_r1,acc,0.312,0.014658474370509005,0 anli_r2,acc,0.328,0.014853842487270336,0 anli_r3,acc,0.32666666666666666,0.013544340907003665,0 arc_challenge,acc,0.2832764505119454,0.013167478735134575,0 arc_challenge,acc_norm,0.29436860068259385,0.013318528460539422,0 arc_easy,acc,0.6094276094276094,0.01001105911206424,0 arc_easy,acc_norm,0.5631313131313131,0.010177672928157695,0 boolq,acc,0.5324159021406728,0.008726657178723137,1 cb,acc,0.5,0.06741998624632421,1 cb,f1,0.4627446995868048,,1 copa,acc,0.71,0.04560480215720684,0 hellaswag,acc,0.3833897629954192,0.0048521826212742526,0 hellaswag,acc_norm,0.47769368651663013,0.00498481339101621,0 piqa,acc,0.750816104461371,0.010091882770120216,0 piqa,acc_norm,0.7584330794341676,0.009986718001804439,0 rte,acc,0.4657039711191336,0.030025579819366426,0 sciq,acc,0.84,0.011598902298689004,0 sciq,acc_norm,0.795,0.012772554096113118,0 storycloze_2016,acc,0.6456440406199893,0.011061031791615487,0 winogrande,acc,0.5706393054459353,0.01391153749996917,0