task,metric,value,err,version anli_r1,acc,0.351,0.015100563798316405,0 anli_r2,acc,0.345,0.015039986742055237,0 anli_r3,acc,0.345,0.013728421539454878,0 arc_challenge,acc,0.30716723549488056,0.013481034054980945,0 arc_challenge,acc_norm,0.32337883959044367,0.013669421630012122,0 arc_easy,acc,0.6300505050505051,0.009906656266021155,0 arc_easy,acc_norm,0.6111111111111112,0.01000324833531377,0 boolq,acc,0.6146788990825688,0.008511930879680652,1 cb,acc,0.5714285714285714,0.06672848092813058,1 cb,f1,0.37671957671957673,,1 copa,acc,0.78,0.04163331998932261,0 hellaswag,acc,0.4827723561043617,0.004986818680313444,0 hellaswag,acc_norm,0.6446922923720374,0.004776283203468094,0 piqa,acc,0.7529923830250272,0.010062268140772625,0 piqa,acc_norm,0.7671381936887922,0.009861236071080753,0 rte,acc,0.5776173285198556,0.02973162264649588,0 sciq,acc,0.919,0.008632121032139978,0 sciq,acc_norm,0.907,0.009188875634996669,0 storycloze_2016,acc,0.7252805986103688,0.010322309878339507,0 winogrande,acc,0.595895816890292,0.013791610664670845,0