task,metric,value,err,version anli_r1,acc,0.337,0.01495508791865361,0 anli_r2,acc,0.334,0.014922019523732967,0 anli_r3,acc,0.3466666666666667,0.013744022550571958,0 arc_challenge,acc,0.19027303754266212,0.0114704241792257,0 arc_challenge,acc_norm,0.2090443686006826,0.011882746987406453,0 arc_easy,acc,0.3952020202020202,0.010031894052790978,0 arc_easy,acc_norm,0.3661616161616162,0.009885391390947726,0 boolq,acc,0.4902140672782875,0.008743379884697191,1 cb,acc,0.48214285714285715,0.06737697508644648,1 cb,f1,0.3363636363636364,,1 copa,acc,0.57,0.04975698519562428,0 hellaswag,acc,0.2824138617805218,0.004492535748097639,0 hellaswag,acc_norm,0.30033857797251545,0.004574683373821047,0 piqa,acc,0.6273122959738846,0.011281318332897741,0 piqa,acc_norm,0.6213275299238302,0.011317163404516852,0 rte,acc,0.48375451263537905,0.030080573208738064,0 sciq,acc,0.69,0.014632638658632902,0 sciq,acc_norm,0.635,0.015231776226264902,0 storycloze_2016,acc,0.5611972207375735,0.011475500529062406,0 winogrande,acc,0.47277032359905286,0.014031631629827701,0