task,metric,value,err,version anli_r1,acc,0.337,0.014955087918653607,0 anli_r2,acc,0.362,0.015204840912919498,0 anli_r3,acc,0.36583333333333334,0.013910212062701167,0 arc_challenge,acc,0.24146757679180889,0.012506564839739434,0 arc_challenge,acc_norm,0.26621160409556316,0.012915774781523214,0 arc_easy,acc,0.4903198653198653,0.010257860554461127,0 arc_easy,acc_norm,0.47474747474747475,0.010246690042583842,0 boolq,acc,0.5409785932721712,0.008715635308774413,1 cb,acc,0.5178571428571429,0.06737697508644647,1 cb,f1,0.27628205128205124,,1 copa,acc,0.67,0.04725815626252606,0 hellaswag,acc,0.40619398526190004,0.004901178917900842,0 hellaswag,acc_norm,0.5022903804023103,0.004989729059957431,0 piqa,acc,0.7072905331882481,0.010616044462393092,0 piqa,acc_norm,0.7072905331882481,0.010616044462393094,0 rte,acc,0.5090252707581228,0.030091559826331334,0 sciq,acc,0.761,0.013493000446937591,0 sciq,acc_norm,0.726,0.014111099288259588,0 storycloze_2016,acc,0.6606092998396579,0.010949682016358629,0 winogrande,acc,0.5280189423835833,0.01403040421340578,0