task,metric,value,err,version anli_r1,acc,0.326,0.014830507204541031,0 anli_r2,acc,0.341,0.014998131348402699,0 anli_r3,acc,0.345,0.01372842153945488,0 arc_challenge,acc,0.18344709897610922,0.011310170179554541,0 arc_challenge,acc_norm,0.22184300341296928,0.012141659068147887,0 arc_easy,acc,0.40446127946127947,0.010070746648278785,0 arc_easy,acc_norm,0.3661616161616162,0.009885391390947728,0 boolq,acc,0.5131498470948013,0.008742030090044975,1 cb,acc,0.42857142857142855,0.06672848092813058,1 cb,f1,0.3,,1 copa,acc,0.59,0.04943110704237102,0 hellaswag,acc,0.28271260705038836,0.004493975527386721,0 hellaswag,acc_norm,0.2983469428400717,0.004565974937793719,0 piqa,acc,0.6332970620239391,0.01124362501903826,0 piqa,acc_norm,0.6240478781284005,0.011301098166895725,0 rte,acc,0.5270758122743683,0.030052303463143706,0 sciq,acc,0.678,0.01478291360099666,0 sciq,acc_norm,0.601,0.015493193313162906,0 storycloze_2016,acc,0.5799037947621593,0.011413833983106135,0 winogrande,acc,0.5122336227308603,0.01404827882040562,0