task,metric,value,err,version anli_r1,acc,0.317,0.014721675438880227,0 anli_r2,acc,0.33,0.014876872027456736,0 anli_r3,acc,0.34,0.013680495725767796,0 arc_challenge,acc,0.30204778156996587,0.013417519144716422,0 arc_challenge,acc_norm,0.3302047781569966,0.013743085603760431,0 arc_easy,acc,0.6313131313131313,0.009899640855681038,0 arc_easy,acc_norm,0.6077441077441077,0.010018744689650043,0 boolq,acc,0.5834862385321101,0.008622288020674008,1 cb,acc,0.4642857142857143,0.06724777654937658,1 cb,f1,0.3299319727891156,,1 copa,acc,0.72,0.04512608598542127,0 hellaswag,acc,0.45289782911770565,0.0049675912675574,0 hellaswag,acc_norm,0.6038637721569409,0.004880937933163293,0 piqa,acc,0.7442872687704026,0.010178690109459862,0 piqa,acc_norm,0.7524483133841132,0.010069703966857114,0 rte,acc,0.49458483754512633,0.030094698123239966,0 sciq,acc,0.908,0.009144376393151103,0 sciq,acc_norm,0.914,0.008870325962594766,0 storycloze_2016,acc,0.7049706039551042,0.010546232606962287,0 winogrande,acc,0.5824782951854776,0.01385997826444025,0