task,metric,value,err,version anli_r1,acc,0.333,0.014910846164229873,0 anli_r2,acc,0.351,0.015100563798316403,0 anli_r3,acc,0.335,0.01363087184382148,0 arc_challenge,acc,0.18771331058020477,0.01141100131415512,0 arc_challenge,acc_norm,0.2158703071672355,0.01202297536003066,0 arc_easy,acc,0.3918350168350168,0.010016835016834962,0 arc_easy,acc_norm,0.3653198653198653,0.009880576614806928,0 boolq,acc,0.4938837920489297,0.00874440068189348,1 cb,acc,0.39285714285714285,0.0658538889806635,1 cb,f1,0.27474323062558353,,1 copa,acc,0.59,0.04943110704237102,0 hellaswag,acc,0.28281218880701053,0.004494454911844635,0 hellaswag,acc_norm,0.2951603266281617,0.004551826272978058,0 piqa,acc,0.6381936887921654,0.011211397313020366,0 piqa,acc_norm,0.6207834602829162,0.011320331012905074,0 rte,acc,0.48375451263537905,0.030080573208738064,0 sciq,acc,0.692,0.014606483127342761,0 sciq,acc_norm,0.64,0.01518652793204012,0 storycloze_2016,acc,0.564404061998931,0.011466111817562836,0 winogrande,acc,0.49013417521704816,0.014049749833367592,0