|
task,metric,value,err,version
|
|
anli_r1,acc,0.296,0.014442734941575018,0
|
|
anli_r2,acc,0.355,0.01513949154378053,0
|
|
anli_r3,acc,0.3233333333333333,0.013508372867300222,0
|
|
arc_challenge,acc,0.3037542662116041,0.013438909184778755,0
|
|
arc_challenge,acc_norm,0.34215017064846415,0.013864152159177278,0
|
|
arc_easy,acc,0.6384680134680135,0.009858506543162063,0
|
|
arc_easy,acc_norm,0.6473063973063973,0.009804420599378657,0
|
|
boolq,acc,0.5724770642201835,0.008652692997177339,1
|
|
cb,acc,0.39285714285714285,0.0658538889806635,1
|
|
cb,f1,0.28226120857699805,,1
|
|
copa,acc,0.77,0.04229525846816506,0
|
|
hellaswag,acc,0.45737900816570404,0.004971619995879763,0
|
|
hellaswag,acc_norm,0.6022704640509858,0.004884287515461492,0
|
|
piqa,acc,0.7442872687704026,0.010178690109459857,0
|
|
piqa,acc_norm,0.7546245919477693,0.010039831320422386,0
|
|
rte,acc,0.47653429602888087,0.030063300411902652,0
|
|
sciq,acc,0.923,0.008434580140240637,0
|
|
sciq,acc_norm,0.926,0.008282064512704159,0
|
|
storycloze_2016,acc,0.6996258685195083,0.010600915927985021,0
|
|
winogrande,acc,0.5698500394632992,0.013914685094716692,0
|
|
|