task,metric,value,err,version anli_r1,acc,0.321,0.014770821817934635,0 anli_r2,acc,0.336,0.014944140233795021,0 anli_r3,acc,0.3383333333333333,0.013664144006618275,0 arc_challenge,acc,0.18088737201365188,0.011248574467407027,0 arc_challenge,acc_norm,0.22184300341296928,0.01214165906814789,0 arc_easy,acc,0.39225589225589225,0.010018744689650043,0 arc_easy,acc_norm,0.3640572390572391,0.009873293392779117,0 boolq,acc,0.48623853211009177,0.008741742106878654,1 cb,acc,0.5,0.06741998624632421,1 cb,f1,0.34486817325800373,,1 copa,acc,0.54,0.05009082659620332,0 hellaswag,acc,0.2813184624576778,0.004487235657955673,0 hellaswag,acc_norm,0.2954590718980283,0.004553164013379556,0 piqa,acc,0.6338411316648531,0.011240106070308455,0 piqa,acc_norm,0.6256800870511425,0.01129127680119499,0 rte,acc,0.5054151624548736,0.030094698123239966,0 sciq,acc,0.686,0.014683991951087962,0 sciq,acc_norm,0.637,0.015213890444671281,0 storycloze_2016,acc,0.5665419561731694,0.011459581799087402,0 winogrande,acc,0.4956590370955012,0.014051956064076892,0