task,metric,value,err,version anli_r1,acc,0.333,0.014910846164229864,0 anli_r2,acc,0.334,0.014922019523732967,0 anli_r3,acc,0.35,0.013774667009018552,0 arc_challenge,acc,0.27474402730375425,0.013044617212771227,0 arc_challenge,acc_norm,0.2883959044368601,0.013238394422428164,0 arc_easy,acc,0.5816498316498316,0.010122061470742853,0 arc_easy,acc_norm,0.571969696969697,0.010152943316426265,0 boolq,acc,0.5990825688073395,0.008571628711617004,1 cb,acc,0.5,0.06741998624632421,1 cb,f1,0.30857142857142855,,1 copa,acc,0.76,0.04292346959909283,0 hellaswag,acc,0.43925512846046605,0.004952820538831887,0 hellaswag,acc_norm,0.5730930093606851,0.00493617678463196,0 piqa,acc,0.735038084874864,0.010296557993316049,0 piqa,acc_norm,0.733949945593036,0.010310039263352824,0 rte,acc,0.4657039711191336,0.030025579819366422,0 sciq,acc,0.869,0.010674874844837956,0 sciq,acc_norm,0.85,0.011297239823409291,0 storycloze_2016,acc,0.6937466595403528,0.010659088460112756,0 winogrande,acc,0.5509076558800315,0.013979459389140834,0