task,metric,value,err,version anli_r1,acc,0.36,0.015186527932040126,0 anli_r2,acc,0.354,0.015129868238451773,0 anli_r3,acc,0.3625,0.013883037874225516,0 arc_challenge,acc,0.25853242320819114,0.012794553754288682,0 arc_challenge,acc_norm,0.2696245733788396,0.012968040686869147,0 arc_easy,acc,0.4877946127946128,0.010256726235129018,0 arc_easy,acc_norm,0.4713804713804714,0.010242962617927192,0 boolq,acc,0.5324159021406728,0.008726657178723137,1 cb,acc,0.5178571428571429,0.06737697508644647,1 cb,f1,0.25430894308943086,,1 copa,acc,0.71,0.04560480215720683,0 hellaswag,acc,0.40400318661621193,0.004896952378506924,0 hellaswag,acc_norm,0.5022903804023103,0.004989729059957428,0 piqa,acc,0.7067464635473341,0.010621818421101928,0 piqa,acc_norm,0.70620239390642,0.01062757408051481,0 rte,acc,0.48014440433212996,0.0300727231673172,0 sciq,acc,0.766,0.01339490288966001,0 sciq,acc_norm,0.734,0.01397996564514515,0 storycloze_2016,acc,0.6563335114911811,0.010982724236255948,0 winogrande,acc,0.5224940805051302,0.014038257824059874,0