task,metric,value,err,version anli_r1,acc,0.333,0.014910846164229868,0 anli_r2,acc,0.326,0.01483050720454104,0 anli_r3,acc,0.3475,0.013751753243291852,0 arc_challenge,acc,0.2627986348122867,0.012862523175351333,0 arc_challenge,acc_norm,0.30716723549488056,0.013481034054980943,0 arc_easy,acc,0.5913299663299664,0.010087174498762883,0 arc_easy,acc_norm,0.5496632996632996,0.010209047724374145,0 boolq,acc,0.5669724770642202,0.00866625130551806,1 cb,acc,0.5357142857142857,0.06724777654937658,1 cb,f1,0.37227304714989445,,1 copa,acc,0.79,0.040936018074033256,0 hellaswag,acc,0.47191794463254333,0.004981905293878145,0 hellaswag,acc_norm,0.6139215295757817,0.004858539527872466,0 piqa,acc,0.7448313384113167,0.010171571592521822,0 piqa,acc_norm,0.7535364526659413,0.01005481078967181,0 rte,acc,0.5703971119133574,0.029796668829124674,0 sciq,acc,0.836,0.011715000693181331,0 sciq,acc_norm,0.781,0.013084731950262012,0 storycloze_2016,acc,0.7151256012827365,0.01043751398661172,0 winogrande,acc,0.5706393054459353,0.013911537499969163,0