task,metric,value,err,version anli_r1,acc,0.327,0.014842213153411247,0 anli_r2,acc,0.333,0.01491084616422986,0 anli_r3,acc,0.3408333333333333,0.01368860079329693,0 arc_challenge,acc,0.295221843003413,0.013329750293382318,0 arc_challenge,acc_norm,0.32337883959044367,0.013669421630012129,0 arc_easy,acc,0.622895622895623,0.009945041946366499,0 arc_easy,acc_norm,0.6018518518518519,0.010044662374653398,0 boolq,acc,0.5920489296636086,0.008595583792654892,1 cb,acc,0.5357142857142857,0.06724777654937658,1 cb,f1,0.3829365079365079,,1 copa,acc,0.78,0.04163331998932262,0 hellaswag,acc,0.48048197570205137,0.00498597821493792,0 hellaswag,acc_norm,0.6397132045409281,0.004791024004587989,0 piqa,acc,0.7578890097932536,0.009994371269104376,0 piqa,acc_norm,0.7682263329706203,0.009845143772794043,0 rte,acc,0.5090252707581228,0.030091559826331334,0 sciq,acc,0.903,0.009363689373248092,0 sciq,acc_norm,0.882,0.010206869264381791,0 storycloze_2016,acc,0.7161945483698557,0.01042569627973092,0 winogrande,acc,0.6053670086819258,0.013736915172371883,0