|
task,metric,value,err,version
|
|
anli_r1,acc,0.355,0.01513949154378053,0
|
|
anli_r2,acc,0.359,0.015177264224798601,0
|
|
anli_r3,acc,0.3308333333333333,0.013588208070708992,0
|
|
arc_challenge,acc,0.2551194539249147,0.012739038695202098,0
|
|
arc_challenge,acc_norm,0.2858361774744027,0.013203196088537369,0
|
|
arc_easy,acc,0.5648148148148148,0.010173216430370927,0
|
|
arc_easy,acc_norm,0.5340909090909091,0.010235908103438688,0
|
|
boolq,acc,0.5856269113149847,0.00861586377642113,1
|
|
cb,acc,0.48214285714285715,0.0673769750864465,1
|
|
cb,f1,0.3403298350824588,,1
|
|
copa,acc,0.7,0.046056618647183814,0
|
|
hellaswag,acc,0.4303923521210914,0.004941191607317909,0
|
|
hellaswag,acc_norm,0.5592511451902011,0.004954622308739016,0
|
|
piqa,acc,0.7328618063112078,0.010323440492612426,0
|
|
piqa,acc_norm,0.73449401523395,0.010303308653024432,0
|
|
rte,acc,0.5451263537906137,0.029973636495415252,0
|
|
sciq,acc,0.862,0.010912152632504411,0
|
|
sciq,acc_norm,0.796,0.012749374359024391,0
|
|
storycloze_2016,acc,0.689470871191876,0.010700112173178448,0
|
|
winogrande,acc,0.5422257300710339,0.014002284504422442,0
|
|
|