task,metric,value,err,version anli_r1,acc,0.338,0.014965960710224496,0 anli_r2,acc,0.345,0.015039986742055238,0 anli_r3,acc,0.3566666666666667,0.013833742805050717,0 arc_challenge,acc,0.29436860068259385,0.013318528460539429,0 arc_challenge,acc_norm,0.3319112627986348,0.01376098820088054,0 arc_easy,acc,0.627104377104377,0.009922743197129257,0 arc_easy,acc_norm,0.609006734006734,0.010012992232540631,0 boolq,acc,0.5923547400611621,0.008594580270731619,1 cb,acc,0.6071428571428571,0.0658538889806635,1 cb,f1,0.5367003367003368,,1 copa,acc,0.8,0.040201512610368445,0 hellaswag,acc,0.4826727743477395,0.004986784319771787,0 hellaswag,acc_norm,0.6368253335988847,0.004799317209902001,0 piqa,acc,0.7589771490750816,0.009979042717267314,0 piqa,acc_norm,0.7742110990206746,0.009754980670917311,0 rte,acc,0.5631768953068592,0.029855247390314945,0 sciq,acc,0.913,0.0089168666307459,0 sciq,acc_norm,0.897,0.009616833339695798,0 storycloze_2016,acc,0.7204703367183325,0.01037770209970486,0 winogrande,acc,0.6037884767166535,0.013746404157154949,0