task,metric,value,err,version anli_r1,acc,0.308,0.014606483127342763,0 anli_r2,acc,0.323,0.014794927843348639,0 anli_r3,acc,0.3441666666666667,0.013720551062295755,0 arc_challenge,acc,0.29692832764505117,0.013352025976725223,0 arc_challenge,acc_norm,0.32081911262798635,0.013640943091946531,0 arc_easy,acc,0.6191077441077442,0.009964428212260372,0 arc_easy,acc_norm,0.5858585858585859,0.010107387673002528,0 boolq,acc,0.5577981651376147,0.008686430526114496,1 cb,acc,0.30357142857142855,0.06199938655510754,1 cb,f1,0.262831508114527,,1 copa,acc,0.71,0.045604802157206845,0 hellaswag,acc,0.3874726150169289,0.0048617741296124945,0 hellaswag,acc_norm,0.47759410476000796,0.004984768912326942,0 piqa,acc,0.7584330794341676,0.009986718001804463,0 piqa,acc_norm,0.766050054406964,0.009877236895137432,0 rte,acc,0.51985559566787,0.030072723167317184,0 sciq,acc,0.833,0.01180043432464459,0 sciq,acc_norm,0.8,0.012655439943366665,0 storycloze_2016,acc,0.6515232495991449,0.01101871778478849,0 winogrande,acc,0.5564325177584846,0.0139626949076204,0