task,metric,value,err,version anli_r1,acc,0.343,0.015019206922356951,0 anli_r2,acc,0.346,0.01505026612756445,0 anli_r3,acc,0.36083333333333334,0.01386918025244486,0 arc_challenge,acc,0.2960750853242321,0.013340916085246271,0 arc_challenge,acc_norm,0.3242320819112628,0.013678810399518819,0 arc_easy,acc,0.6283670033670034,0.00991589712365879,0 arc_easy,acc_norm,0.6153198653198653,0.009983171707008997,0 boolq,acc,0.6119266055045871,0.00852313058476084,1 cb,acc,0.5535714285714286,0.06703189227942395,1 cb,f1,0.4583333333333333,,1 copa,acc,0.8,0.040201512610368445,0 hellaswag,acc,0.48157737502489545,0.0049863932662691625,0 hellaswag,acc_norm,0.6417048396733719,0.00478519504988916,0 piqa,acc,0.7595212187159956,0.009971345364651078,0 piqa,acc_norm,0.7676822633297062,0.009853201384168243,0 rte,acc,0.5379061371841155,0.030009848912529113,0 sciq,acc,0.923,0.008434580140240648,0 sciq,acc_norm,0.912,0.008963053962592074,0 storycloze_2016,acc,0.7338321753073223,0.010220104800551206,0 winogrande,acc,0.6085240726124704,0.01371748707129085,0