|
task,metric,value,err,version
|
|
anli_r1,acc,0.332,0.014899597242811478,0
|
|
anli_r2,acc,0.329,0.014865395385928362,0
|
|
anli_r3,acc,0.34833333333333333,0.013759437498874075,0
|
|
arc_challenge,acc,0.26791808873720135,0.012942030195136437,0
|
|
arc_challenge,acc_norm,0.2883959044368601,0.013238394422428171,0
|
|
arc_easy,acc,0.6052188552188552,0.010030038935883584,0
|
|
arc_easy,acc_norm,0.5429292929292929,0.01022189756425604,0
|
|
boolq,acc,0.5623853211009174,0.008676717715731632,1
|
|
cb,acc,0.5714285714285714,0.06672848092813058,1
|
|
cb,f1,0.3888888888888889,,1
|
|
copa,acc,0.76,0.04292346959909283,0
|
|
hellaswag,acc,0.469627564230233,0.004980566907790459,0
|
|
hellaswag,acc_norm,0.6134236207926708,0.004859699562451462,0
|
|
piqa,acc,0.7578890097932536,0.00999437126910438,0
|
|
piqa,acc_norm,0.7622415669205659,0.009932525779525492,0
|
|
rte,acc,0.5415162454873647,0.029992535385373314,0
|
|
sciq,acc,0.852,0.011234866364235235,0
|
|
sciq,acc_norm,0.764,0.013434451402438678,0
|
|
storycloze_2016,acc,0.7108498129342598,0.010484068799942072,0
|
|
winogrande,acc,0.5737963693764798,0.013898585965412338,0
|
|
|