task,metric,value,err,version anli_r1,acc,0.303,0.014539683710535264,0 anli_r2,acc,0.312,0.01465847437050901,0 anli_r3,acc,0.3491666666666667,0.013767075395077247,0 arc_challenge,acc,0.23122866894197952,0.012320858834772273,0 arc_challenge,acc_norm,0.2619453924914676,0.012849054826858115,0 arc_easy,acc,0.5231481481481481,0.010248782484554473,0 arc_easy,acc_norm,0.4819023569023569,0.010253060653479177,0 boolq,acc,0.6134556574923548,0.008516943934341973,1 cb,acc,0.30357142857142855,0.06199938655510753,1 cb,f1,0.2927120669056153,,1 copa,acc,0.79,0.040936018074033256,0 hellaswag,acc,0.4506074487153953,0.0049653753416431376,0 hellaswag,acc_norm,0.5834495120493925,0.004919794704673269,0 piqa,acc,0.6887921653971708,0.010802263878045844,0 piqa,acc_norm,0.6866158868335147,0.010822829929195489,0 rte,acc,0.5342960288808665,0.030025579819366422,0 sciq,acc,0.88,0.010281328012747391,0 sciq,acc_norm,0.863,0.010878848714333327,0 storycloze_2016,acc,0.6996258685195083,0.010600915927985033,0 winogrande,acc,0.6077348066298343,0.013722400462000883,0