task,metric,value,err,version anli_r1,acc,0.322,0.014782913600996652,0 anli_r2,acc,0.361,0.015195720118175113,0 anli_r3,acc,0.33916666666666667,0.013672343491681815,0 arc_challenge,acc,0.24829351535836178,0.012624912868089758,0 arc_challenge,acc_norm,0.2815699658703072,0.013143376735009026,0 arc_easy,acc,0.494949494949495,0.01025926010256586,0 arc_easy,acc_norm,0.4730639730639731,0.010244884740620094,0 boolq,acc,0.5281345565749236,0.008731199646681927,1 cb,acc,0.4642857142857143,0.06724777654937658,1 cb,f1,0.21666666666666667,,1 copa,acc,0.72,0.04512608598542127,0 hellaswag,acc,0.4039036048595897,0.00489675785702255,0 hellaswag,acc_norm,0.5044811790479984,0.004989581008163205,0 piqa,acc,0.7067464635473341,0.010621818421101926,0 piqa,acc_norm,0.7165397170837867,0.010515057791152041,0 rte,acc,0.5054151624548736,0.030094698123239966,0 sciq,acc,0.753,0.013644675781314133,0 sciq,acc_norm,0.715,0.014282120955200471,0 storycloze_2016,acc,0.6547300908605024,0.010994860223187675,0 winogrande,acc,0.5303867403314917,0.014026510839428734,0