task,metric,value,err,version anli_r1,acc,0.328,0.014853842487270336,0 anli_r2,acc,0.38,0.015356947477797577,0 anli_r3,acc,0.3575,0.013840921245257796,0 arc_challenge,acc,0.2986348122866894,0.013374078615068756,0 arc_challenge,acc_norm,0.3216723549488055,0.013650488084494162,0 arc_easy,acc,0.6456228956228957,0.009815004030251743,0 arc_easy,acc_norm,0.6435185185185185,0.009828046544504438,0 boolq,acc,0.5596330275229358,0.008682635667686902,1 cb,acc,0.48214285714285715,0.0673769750864465,1 cb,f1,0.3799029799029799,,1 copa,acc,0.77,0.04229525846816506,0 hellaswag,acc,0.4539932284405497,0.00496861353930925,0 hellaswag,acc_norm,0.6053574985062736,0.004877748536428436,0 piqa,acc,0.7464635473340587,0.010150090834551782,0 piqa,acc_norm,0.7529923830250272,0.010062268140772644,0 rte,acc,0.5018050541516246,0.030096267148976626,0 sciq,acc,0.921,0.008534156773333431,0 sciq,acc_norm,0.938,0.007629823996280307,0 storycloze_2016,acc,0.7204703367183325,0.010377702099704856,0 winogrande,acc,0.5832675611681136,0.013856250072796318,0