task,metric,value,err,version anli_r1,acc,0.329,0.014865395385928354,0 anli_r2,acc,0.336,0.014944140233795027,0 anli_r3,acc,0.3383333333333333,0.013664144006618266,0 arc_challenge,acc,0.2781569965870307,0.013094469919538805,0 arc_challenge,acc_norm,0.30887372013651876,0.013501770929344003,0 arc_easy,acc,0.5993265993265994,0.01005530447425557,0 arc_easy,acc_norm,0.5576599326599326,0.01019133444422085,0 boolq,acc,0.5660550458715596,0.008668405003744129,1 cb,acc,0.48214285714285715,0.06737697508644648,1 cb,f1,0.3338011695906433,,1 copa,acc,0.79,0.040936018074033256,0 hellaswag,acc,0.4697271459868552,0.004980627287147585,0 hellaswag,acc_norm,0.6141206930890261,0.004858074013443988,0 piqa,acc,0.7470076169749728,0.01014288869886246,0 piqa,acc_norm,0.7519042437431991,0.010077118315574706,0 rte,acc,0.5523465703971119,0.02993107036293953,0 sciq,acc,0.835,0.011743632866916145,0 sciq,acc_norm,0.79,0.01288666233227453,0 storycloze_2016,acc,0.7156600748262961,0.010431614128665253,0 winogrande,acc,0.574585635359116,0.013895257666646378,0