task,metric,value,err,version anli_r1,acc,0.334,0.014922019523732963,0 anli_r2,acc,0.315,0.014696631960792503,0 anli_r3,acc,0.33416666666666667,0.01362243481313678,0 arc_challenge,acc,0.18600682593856654,0.011370940183266728,0 arc_challenge,acc_norm,0.2167235494880546,0.012040156713481192,0 arc_easy,acc,0.39057239057239057,0.010011059112064236,0 arc_easy,acc_norm,0.3547979797979798,0.009817629113069697,0 boolq,acc,0.4957186544342508,0.008744734378208071,1 cb,acc,0.5535714285714286,0.06703189227942398,1 cb,f1,0.38156331670230453,,1 copa,acc,0.55,0.04999999999999999,0 hellaswag,acc,0.2811192989444334,0.004486268470666331,0 hellaswag,acc_norm,0.29874526986656047,0.0045677248720572,0 piqa,acc,0.6338411316648531,0.011240106070308453,0 piqa,acc_norm,0.6284004352557128,0.011274603006724747,0 rte,acc,0.5415162454873647,0.029992535385373314,0 sciq,acc,0.691,0.014619600977206488,0 sciq,acc_norm,0.634,0.015240612726405756,0 storycloze_2016,acc,0.5617316942811331,0.011473969561488145,0 winogrande,acc,0.489344909234412,0.014049294536290396,0