task,metric,value,err,version anli_r1,acc,0.324,0.01480686473373886,0 anli_r2,acc,0.319,0.014746404865473494,0 anli_r3,acc,0.32166666666666666,0.013490095282989521,0 arc_challenge,acc,0.26791808873720135,0.012942030195136433,0 arc_challenge,acc_norm,0.2841296928327645,0.013179442447653886,0 arc_easy,acc,0.5824915824915825,0.01011918737777603,0 arc_easy,acc_norm,0.5488215488215489,0.010210757101073468,0 boolq,acc,0.6024464831804281,0.00855952325693682,1 cb,acc,0.4107142857142857,0.06633634150359541,1 cb,f1,0.3192955192955193,,1 copa,acc,0.73,0.044619604333847394,0 hellaswag,acc,0.44015136427006574,0.004953907062096603,0 hellaswag,acc_norm,0.5684126667994424,0.004942853459371548,0 piqa,acc,0.7306855277475517,0.010350004070588758,0 piqa,acc_norm,0.7437431991294886,0.010185787831565058,0 rte,acc,0.5126353790613718,0.030086851767188564,0 sciq,acc,0.857,0.011075814808567038,0 sciq,acc_norm,0.832,0.01182860583145426,0 storycloze_2016,acc,0.689470871191876,0.010700112173178448,0 winogrande,acc,0.5548539857932123,0.01396766295435549,0