task,metric,value,err,version anli_r1,acc,0.345,0.015039986742055238,0 anli_r2,acc,0.339,0.014976758771620345,0 anli_r3,acc,0.3516666666666667,0.013789711695404789,0 arc_challenge,acc,0.29692832764505117,0.013352025976725222,0 arc_challenge,acc_norm,0.32764505119453924,0.013715847940719346,0 arc_easy,acc,0.6393097643097643,0.009853512108416734,0 arc_easy,acc_norm,0.6220538720538721,0.009949405744045481,0 boolq,acc,0.5700305810397553,0.008658853690729254,1 cb,acc,0.2857142857142857,0.060914490387317256,1 cb,f1,0.26703155274583845,,1 copa,acc,0.75,0.04351941398892446,0 hellaswag,acc,0.3828918542123083,0.004850988215167541,0 hellaswag,acc_norm,0.48605855407289383,0.00498784136740252,0 piqa,acc,0.7546245919477693,0.010039831320422396,0 piqa,acc_norm,0.76550598476605,0.00988520314324054,0 rte,acc,0.516245487364621,0.030080573208738064,0 sciq,acc,0.853,0.011203415395160336,0 sciq,acc_norm,0.851,0.01126614068463217,0 storycloze_2016,acc,0.6622127204703367,0.010937034991003881,0 winogrande,acc,0.55327545382794,0.01397248837161669,0