task,metric,value,err,version anli_r1,acc,0.332,0.014899597242811485,0 anli_r2,acc,0.337,0.0149550879186536,0 anli_r3,acc,0.355,0.013819249004047296,0 arc_challenge,acc,0.28668941979522183,0.013214986329274757,0 arc_challenge,acc_norm,0.30631399317406144,0.013470584417276513,0 arc_easy,acc,0.617003367003367,0.009974920384536469,0 arc_easy,acc_norm,0.5462962962962963,0.010215708295494117,0 boolq,acc,0.5669724770642202,0.008666251305518059,1 cb,acc,0.48214285714285715,0.0673769750864465,1 cb,f1,0.4347442680776014,,1 copa,acc,0.76,0.04292346959909283,0 hellaswag,acc,0.4841665006970723,0.004987278910505115,0 hellaswag,acc_norm,0.6352320254929297,0.004803812631994966,0 piqa,acc,0.7578890097932536,0.00999437126910438,0 piqa,acc_norm,0.7676822633297062,0.009853201384168243,0 rte,acc,0.5306859205776173,0.03003973059219781,0 sciq,acc,0.845,0.011450157470799475,0 sciq,acc_norm,0.757,0.013569640199177458,0 storycloze_2016,acc,0.7151256012827365,0.010437513986611718,0 winogrande,acc,0.5990528808208366,0.013773974554948033,0