task,metric,value,err,version anli_r1,acc,0.332,0.014899597242811475,0 anli_r2,acc,0.316,0.014709193056057106,0 anli_r3,acc,0.31666666666666665,0.013434078660827384,0 arc_challenge,acc,0.30887372013651876,0.013501770929344003,0 arc_challenge,acc_norm,0.32849829351535836,0.013724978465537377,0 arc_easy,acc,0.6401515151515151,0.009848484848484846,0 arc_easy,acc_norm,0.6296296296296297,0.009908978578665755,0 boolq,acc,0.6275229357798165,0.008455846866956086,1 cb,acc,0.30357142857142855,0.06199938655510754,1 cb,f1,0.2503507986266607,,1 copa,acc,0.8,0.040201512610368445,0 hellaswag,acc,0.4788886675960964,0.004985331652408345,0 hellaswag,acc_norm,0.6412069308902609,0.004786660691181937,0 piqa,acc,0.750272034820457,0.010099232969867486,0 piqa,acc_norm,0.764961915125136,0.009893146688805312,0 rte,acc,0.5740072202166066,0.02976495674177765,0 sciq,acc,0.921,0.008534156773333445,0 sciq,acc_norm,0.908,0.00914437639315112,0 storycloze_2016,acc,0.7279529663281668,0.010290888060871242,0 winogrande,acc,0.5911602209944752,0.013816954295135684,0