|
task,metric,value,err,version
|
|
anli_r1,acc,0.34,0.014987482264363937,0
|
|
anli_r2,acc,0.321,0.014770821817934644,0
|
|
anli_r3,acc,0.34,0.013680495725767803,0
|
|
arc_challenge,acc,0.29266211604095566,0.013295916103619417,0
|
|
arc_challenge,acc_norm,0.32337883959044367,0.013669421630012132,0
|
|
arc_easy,acc,0.6262626262626263,0.009927267058259628,0
|
|
arc_easy,acc_norm,0.5917508417508418,0.01008556619579125,0
|
|
boolq,acc,0.5948012232415902,0.008586427929715515,1
|
|
cb,acc,0.375,0.06527912098338669,1
|
|
cb,f1,0.32099491681373216,,1
|
|
copa,acc,0.77,0.04229525846816506,0
|
|
hellaswag,acc,0.48078072097191793,0.004986093791041653,0
|
|
hellaswag,acc_norm,0.6337382991435969,0.004807975515446487,0
|
|
piqa,acc,0.7622415669205659,0.009932525779525489,0
|
|
piqa,acc_norm,0.763873775843308,0.009908965890558218,0
|
|
rte,acc,0.5740072202166066,0.029764956741777645,0
|
|
sciq,acc,0.904,0.009320454434783227,0
|
|
sciq,acc_norm,0.885,0.01009340759490462,0
|
|
storycloze_2016,acc,0.7204703367183325,0.01037770209970486,0
|
|
winogrande,acc,0.590370955011839,0.013821049109655453,0
|
|
|