task,metric,value,err,version anli_r1,acc,0.326,0.014830507204541042,0 anli_r2,acc,0.368,0.015258073561521802,0 anli_r3,acc,0.33,0.013579531277800922,0 arc_challenge,acc,0.2645051194539249,0.012889272949313368,0 arc_challenge,acc_norm,0.2901023890784983,0.013261573677520767,0 arc_easy,acc,0.5812289562289562,0.010123487160167812,0 arc_easy,acc_norm,0.5614478114478114,0.010182010275471115,0 boolq,acc,0.6079510703363914,0.008538802914911997,1 cb,acc,0.44642857142857145,0.06703189227942398,1 cb,f1,0.3407114624505929,,1 copa,acc,0.75,0.04351941398892446,0 hellaswag,acc,0.44124676359290976,0.004955212787832385,0 hellaswag,acc_norm,0.572495518820952,0.004937054233711573,0 piqa,acc,0.7219804134929271,0.010453117358332811,0 piqa,acc_norm,0.7323177366702938,0.010330111189370423,0 rte,acc,0.5415162454873647,0.029992535385373314,0 sciq,acc,0.868,0.010709373963528033,0 sciq,acc_norm,0.848,0.01135891830347529,0 storycloze_2016,acc,0.6916087653661144,0.010679734445487796,0 winogrande,acc,0.5445935280189423,0.013996485037729794,0