task,metric,value,err,version anli_r1,acc,0.32,0.014758652303574876,0 anli_r2,acc,0.334,0.014922019523732963,0 anli_r3,acc,0.32916666666666666,0.013570806258433633,0 arc_challenge,acc,0.15870307167235495,0.010677974278076945,0 arc_challenge,acc_norm,0.2158703071672355,0.012022975360030665,0 arc_easy,acc,0.37415824915824913,0.009929516948977625,0 arc_easy,acc_norm,0.3707912457912458,0.009911292822056923,0 boolq,acc,0.5944954128440367,0.008587459055441612,1 cb,acc,0.35714285714285715,0.0646095738380922,1 cb,f1,0.24356089022255564,,1 copa,acc,0.62,0.048783173121456316,0 hellaswag,acc,0.26837283409679347,0.004422070927212532,0 hellaswag,acc_norm,0.27982473610834496,0.004479955169853626,0 piqa,acc,0.5843307943416758,0.011498699770894783,0 piqa,acc_norm,0.5897714907508161,0.011476256036359114,0 rte,acc,0.51985559566787,0.030072723167317184,0 sciq,acc,0.736,0.013946271849440472,0 sciq,acc_norm,0.691,0.014619600977206486,0 storycloze_2016,acc,0.5462319615179049,0.01151289919986303,0 winogrande,acc,0.5169692186266772,0.014044390401612981,0