task,metric,value,err,version anli_r1,acc,0.332,0.014899597242811478,0 anli_r2,acc,0.329,0.014865395385928362,0 anli_r3,acc,0.34833333333333333,0.013759437498874075,0 arc_challenge,acc,0.26791808873720135,0.012942030195136437,0 arc_challenge,acc_norm,0.2883959044368601,0.013238394422428171,0 arc_easy,acc,0.6052188552188552,0.010030038935883584,0 arc_easy,acc_norm,0.5429292929292929,0.01022189756425604,0 boolq,acc,0.5623853211009174,0.008676717715731632,1 cb,acc,0.5714285714285714,0.06672848092813058,1 cb,f1,0.3888888888888889,,1 copa,acc,0.76,0.04292346959909283,0 hellaswag,acc,0.469627564230233,0.004980566907790459,0 hellaswag,acc_norm,0.6134236207926708,0.004859699562451462,0 piqa,acc,0.7578890097932536,0.00999437126910438,0 piqa,acc_norm,0.7622415669205659,0.009932525779525492,0 rte,acc,0.5415162454873647,0.029992535385373314,0 sciq,acc,0.852,0.011234866364235235,0 sciq,acc_norm,0.764,0.013434451402438678,0 storycloze_2016,acc,0.7108498129342598,0.010484068799942072,0 winogrande,acc,0.5737963693764798,0.013898585965412338,0