task,metric,value,err,version anli_r1,acc,0.328,0.014853842487270334,0 anli_r2,acc,0.337,0.014955087918653605,0 anli_r3,acc,0.32416666666666666,0.013517438120881624,0 arc_challenge,acc,0.20648464163822525,0.011828865619002316,0 arc_challenge,acc_norm,0.2551194539249147,0.012739038695202109,0 arc_easy,acc,0.40614478114478114,0.010077409815364048,0 arc_easy,acc_norm,0.3766835016835017,0.009942848077476172,0 boolq,acc,0.6256880733944954,0.00846424665644323,1 cb,acc,0.26785714285714285,0.05971290310957636,1 cb,f1,0.18656056587091072,,1 copa,acc,0.76,0.04292346959909283,0 hellaswag,acc,0.4547898824935272,0.004969341773423513,0 hellaswag,acc_norm,0.5937064329814777,0.004901368629533419,0 piqa,acc,0.6561479869423286,0.011082356277961393,0 piqa,acc_norm,0.6528835690968444,0.011107104993128086,0 rte,acc,0.5595667870036101,0.029882123363118726,0 sciq,acc,0.775,0.013211720158614756,0 sciq,acc_norm,0.709,0.014370995982377933,0 storycloze_2016,acc,0.694815606627472,0.010648664383985661,0 winogrande,acc,0.5769534333070244,0.01388505535905647,0