task,metric,value,err,version anli_r1,acc,0.345,0.015039986742055235,0 anli_r2,acc,0.328,0.014853842487270334,0 anli_r3,acc,0.36083333333333334,0.013869180252444862,0 arc_challenge,acc,0.2738907849829352,0.013032004972989505,0 arc_challenge,acc_norm,0.2935153583617747,0.013307250444941113,0 arc_easy,acc,0.5963804713804713,0.01006736896034822,0 arc_easy,acc_norm,0.5723905723905723,0.010151683397430673,0 boolq,acc,0.6061162079510704,0.008545835792614984,1 cb,acc,0.44642857142857145,0.06703189227942398,1 cb,f1,0.3346801346801347,,1 copa,acc,0.77,0.04229525846816506,0 hellaswag,acc,0.44074885480979886,0.004954622308739005,0 hellaswag,acc_norm,0.5762796255725952,0.004931372657129803,0 piqa,acc,0.7285092491838956,0.010376251176596135,0 piqa,acc_norm,0.7377584330794341,0.010262502565172442,0 rte,acc,0.48375451263537905,0.030080573208738064,0 sciq,acc,0.879,0.01031821038094609,0 sciq,acc_norm,0.87,0.010640169792499361,0 storycloze_2016,acc,0.6942811330839124,0.010653884866190597,0 winogrande,acc,0.5524861878453039,0.013974847640536204,0