task,metric,value,err,version anli_r1,acc,0.334,0.01492201952373296,0 anli_r2,acc,0.334,0.014922019523732965,0 anli_r3,acc,0.3566666666666667,0.013833742805050715,0 arc_challenge,acc,0.26109215017064846,0.012835523909473848,0 arc_challenge,acc_norm,0.26706484641638223,0.012928933196496344,0 arc_easy,acc,0.5387205387205387,0.010228972678389611,0 arc_easy,acc_norm,0.48947811447811446,0.010257511546488232,0 boolq,acc,0.5758409785932722,0.00864386902338812,1 cb,acc,0.2857142857142857,0.06091449038731725,1 cb,f1,0.2883181980926342,,1 copa,acc,0.71,0.04560480215720684,0 hellaswag,acc,0.43905596494722166,0.0049525768633152155,0 hellaswag,acc_norm,0.5635331607249552,0.004949335356881862,0 piqa,acc,0.7154515778019587,0.010527218464130614,0 piqa,acc_norm,0.735038084874864,0.010296557993316038,0 rte,acc,0.5379061371841155,0.030009848912529117,0 sciq,acc,0.777,0.013169830843425677,0 sciq,acc_norm,0.686,0.01468399195108796,0 storycloze_2016,acc,0.703901656867985,0.010557307688475126,0 winogrande,acc,0.5445935280189423,0.013996485037729793,0