task,metric,value,err,version anli_r1,acc,0.309,0.014619600977206491,0 anli_r2,acc,0.343,0.015019206922356953,0 anli_r3,acc,0.3333333333333333,0.013613950010225601,0 arc_challenge,acc,0.19112627986348124,0.011490055292778592,0 arc_challenge,acc_norm,0.2167235494880546,0.012040156713481189,0 arc_easy,acc,0.3888888888888889,0.01000324833531376,0 arc_easy,acc_norm,0.36237373737373735,0.009863468202583789,0 boolq,acc,0.4938837920489297,0.00874440068189347,1 cb,acc,0.35714285714285715,0.0646095738380922,1 cb,f1,0.24743230625583568,,1 copa,acc,0.57,0.04975698519562428,0 hellaswag,acc,0.2835092611033659,0.004497803024345142,0 hellaswag,acc_norm,0.2983469428400717,0.004565974937793714,0 piqa,acc,0.6251360174102285,0.011294565805619017,0 piqa,acc_norm,0.6218715995647442,0.011313980666854535,0 rte,acc,0.516245487364621,0.030080573208738064,0 sciq,acc,0.672,0.014853842487270334,0 sciq,acc_norm,0.627,0.015300493622922814,0 storycloze_2016,acc,0.5628006413682523,0.011470867061664471,0 winogrande,acc,0.5011838989739542,0.014052446290529015,0