|
task,metric,value,err,version
|
|
anli_r1,acc,0.337,0.01495508791865361,0
|
|
anli_r2,acc,0.334,0.014922019523732967,0
|
|
anli_r3,acc,0.3466666666666667,0.013744022550571958,0
|
|
arc_challenge,acc,0.19027303754266212,0.0114704241792257,0
|
|
arc_challenge,acc_norm,0.2090443686006826,0.011882746987406453,0
|
|
arc_easy,acc,0.3952020202020202,0.010031894052790978,0
|
|
arc_easy,acc_norm,0.3661616161616162,0.009885391390947726,0
|
|
boolq,acc,0.4902140672782875,0.008743379884697191,1
|
|
cb,acc,0.48214285714285715,0.06737697508644648,1
|
|
cb,f1,0.3363636363636364,,1
|
|
copa,acc,0.57,0.04975698519562428,0
|
|
hellaswag,acc,0.2824138617805218,0.004492535748097639,0
|
|
hellaswag,acc_norm,0.30033857797251545,0.004574683373821047,0
|
|
piqa,acc,0.6273122959738846,0.011281318332897741,0
|
|
piqa,acc_norm,0.6213275299238302,0.011317163404516852,0
|
|
rte,acc,0.48375451263537905,0.030080573208738064,0
|
|
sciq,acc,0.69,0.014632638658632902,0
|
|
sciq,acc_norm,0.635,0.015231776226264902,0
|
|
storycloze_2016,acc,0.5611972207375735,0.011475500529062406,0
|
|
winogrande,acc,0.47277032359905286,0.014031631629827701,0
|
|
|