task,metric,value,err,version anli_r1,acc,0.344,0.015029633724408947,0 anli_r2,acc,0.321,0.01477082181793464,0 anli_r3,acc,0.34833333333333333,0.01375943749887408,0 arc_challenge,acc,0.27986348122866894,0.013119040897725922,0 arc_challenge,acc_norm,0.31143344709897613,0.013532472099850942,0 arc_easy,acc,0.5984848484848485,0.010058790020755567,0 arc_easy,acc_norm,0.5395622895622896,0.01022761638628902,0 boolq,acc,0.5700305810397553,0.008658853690729254,1 cb,acc,0.35714285714285715,0.06460957383809221,1 cb,f1,0.1754385964912281,,1 copa,acc,0.8,0.040201512610368445,0 hellaswag,acc,0.4792869946225851,0.004985498055190357,0 hellaswag,acc_norm,0.6265684126667994,0.004827266662144035,0 piqa,acc,0.7584330794341676,0.009986718001804467,0 piqa,acc_norm,0.7633297062023939,0.009916841655042809,0 rte,acc,0.5342960288808665,0.030025579819366422,0 sciq,acc,0.848,0.011358918303475274,0 sciq,acc_norm,0.769,0.013334797216936438,0 storycloze_2016,acc,0.7231427044361304,0.01034711289027692,0 winogrande,acc,0.5753749013417522,0.013891893150264213,0