task,metric,value,err,version anli_r1,acc,0.338,0.014965960710224489,0 anli_r2,acc,0.332,0.014899597242811476,0 anli_r3,acc,0.3325,0.013605417345710526,0 arc_challenge,acc,0.27986348122866894,0.01311904089772592,0 arc_challenge,acc_norm,0.29180887372013653,0.013284525292403506,0 arc_easy,acc,0.5765993265993266,0.010138671005289047,0 arc_easy,acc_norm,0.5585016835016835,0.010189314382749929,0 boolq,acc,0.6079510703363914,0.008538802914911992,1 cb,acc,0.08928571428571429,0.038450387280282494,1 cb,f1,0.0871517027863777,,1 copa,acc,0.83,0.03775251680686371,0 hellaswag,acc,0.4525990838478391,0.0049673082544257514,0 hellaswag,acc_norm,0.5948018323043218,0.004899270310557971,0 piqa,acc,0.7257889009793254,0.010408618664933382,0 piqa,acc_norm,0.7334058759521219,0.010316749863541365,0 rte,acc,0.49458483754512633,0.03009469812323996,0 sciq,acc,0.908,0.009144376393151086,0 sciq,acc_norm,0.906,0.009233052000787738,0 storycloze_2016,acc,0.7135221806520577,0.01045510591863303,0 winogrande,acc,0.585635359116022,0.01384484623226856,0