task,metric,value,err,version anli_r1,acc,0.32,0.014758652303574869,0 anli_r2,acc,0.341,0.014998131348402706,0 anli_r3,acc,0.32166666666666666,0.013490095282989521,0 arc_challenge,acc,0.30887372013651876,0.013501770929344003,0 arc_challenge,acc_norm,0.3395904436860068,0.013839039762820164,0 arc_easy,acc,0.6388888888888888,0.00985601342581124,0 arc_easy,acc_norm,0.6506734006734006,0.009782853449399295,0 boolq,acc,0.5626911314984709,0.008676043429497423,1 cb,acc,0.39285714285714285,0.0658538889806635,1 cb,f1,0.36734693877551017,,1 copa,acc,0.76,0.04292346959909283,0 hellaswag,acc,0.4540928101971719,0.004968705270086761,0 hellaswag,acc_norm,0.6073491336387173,0.004873421833291568,0 piqa,acc,0.7464635473340587,0.010150090834551786,0 piqa,acc_norm,0.7524483133841132,0.010069703966857116,0 rte,acc,0.5306859205776173,0.03003973059219781,0 sciq,acc,0.921,0.008534156773333438,0 sciq,acc_norm,0.933,0.007910345983177547,0 storycloze_2016,acc,0.7172634954569749,0.01041380648612127,0 winogrande,acc,0.5864246250986582,0.013840971763195308,0