task,metric,value,err,version anli_r1,acc,0.343,0.015019206922356951,0 anli_r2,acc,0.356,0.015149042659306621,0 anli_r3,acc,0.3433333333333333,0.01371263383046586,0 arc_challenge,acc,0.23890784982935154,0.012461071376316614,0 arc_challenge,acc_norm,0.26535836177474403,0.012902554762313967,0 arc_easy,acc,0.49284511784511786,0.010258733022446367,0 arc_easy,acc_norm,0.4696969696969697,0.01024092360872654,0 boolq,acc,0.5333333333333333,0.008725599880049204,1 cb,acc,0.4107142857142857,0.06633634150359541,1 cb,f1,0.19658119658119658,,1 copa,acc,0.73,0.0446196043338474,0 hellaswag,acc,0.4047002589125672,0.004898308167211846,0 hellaswag,acc_norm,0.4994025094602669,0.004989777848791008,0 piqa,acc,0.7110990206746464,0.010575111841364905,0 piqa,acc_norm,0.7072905331882481,0.010616044462393094,0 rte,acc,0.5018050541516246,0.030096267148976633,0 sciq,acc,0.724,0.014142984975740668,0 sciq,acc_norm,0.688,0.014658474370509001,0 storycloze_2016,acc,0.6563335114911811,0.010982724236255945,0 winogrande,acc,0.5193370165745856,0.014041972733712974,0