|
task,metric,value,err,version
|
|
anli_r1,acc,0.321,0.014770821817934661,0
|
|
anli_r2,acc,0.355,0.01513949154378053,0
|
|
anli_r3,acc,0.35083333333333333,0.013782212417178202,0
|
|
arc_challenge,acc,0.30716723549488056,0.013481034054980945,0
|
|
arc_challenge,acc_norm,0.3199658703071672,0.013631345807016195,0
|
|
arc_easy,acc,0.6300505050505051,0.009906656266021158,0
|
|
arc_easy,acc_norm,0.6317340067340067,0.009897286209010888,0
|
|
boolq,acc,0.5871559633027523,0.00861117243047287,1
|
|
cb,acc,0.42857142857142855,0.06672848092813058,1
|
|
cb,f1,0.41546499477533966,,1
|
|
copa,acc,0.72,0.04512608598542128,0
|
|
hellaswag,acc,0.4582752439753037,0.004972377085916326,0
|
|
hellaswag,acc_norm,0.6056562437761402,0.004877104939356237,0
|
|
piqa,acc,0.7448313384113167,0.01017157159252182,0
|
|
piqa,acc_norm,0.7546245919477693,0.010039831320422386,0
|
|
rte,acc,0.51985559566787,0.030072723167317184,0
|
|
sciq,acc,0.924,0.008384169266796401,0
|
|
sciq,acc_norm,0.93,0.008072494358323499,0
|
|
storycloze_2016,acc,0.709246392303581,0.010501233625213076,0
|
|
winogrande,acc,0.5895816890292028,0.013825107120035865,0
|
|
|