task,metric,value,err,version anli_r1,acc,0.337,0.014955087918653605,0 anli_r2,acc,0.327,0.014842213153411242,0 anli_r3,acc,0.3475,0.013751753243291854,0 arc_challenge,acc,0.24744027303754265,0.012610352663292673,0 arc_challenge,acc_norm,0.28924914675767915,0.013250012579393443,0 arc_easy,acc,0.5286195286195287,0.010242962617927195,0 arc_easy,acc_norm,0.4654882154882155,0.010235314238969397,0 boolq,acc,0.5223241590214067,0.00873633411558504,1 cb,acc,0.4642857142857143,0.06724777654937658,1 cb,f1,0.3011204481792717,,1 copa,acc,0.79,0.040936018074033256,0 hellaswag,acc,0.4069906393148775,0.004902690765066426,0 hellaswag,acc_norm,0.518621788488349,0.004986319587524956,0 piqa,acc,0.7143634385201306,0.010539303948661932,0 piqa,acc_norm,0.7241566920565833,0.01042780550272912,0 rte,acc,0.5487364620938628,0.029953149241808946,0 sciq,acc,0.75,0.013699915608779773,0 sciq,acc_norm,0.681,0.014746404865473493,0 storycloze_2016,acc,0.6440406199893105,0.01107225418438284,0 winogrande,acc,0.5524861878453039,0.013974847640536203,0