task,metric,value,err,version anli_r1,acc,0.334,0.014922019523732961,0 anli_r2,acc,0.34,0.014987482264363935,0 anli_r3,acc,0.35083333333333333,0.013782212417178197,0 arc_challenge,acc,0.27559726962457337,0.01305716965576184,0 arc_challenge,acc_norm,0.30802047781569963,0.01349142951729204,0 arc_easy,acc,0.5808080808080808,0.010124905282491183,0 arc_easy,acc_norm,0.5711279461279462,0.010155440652900152,0 boolq,acc,0.6116207951070336,0.008524357307908792,1 cb,acc,0.17857142857142858,0.051642771820087224,1 cb,f1,0.18279613107199313,,1 copa,acc,0.82,0.03861229196653697,0 hellaswag,acc,0.454690300736905,0.004969251445596333,0 hellaswag,acc_norm,0.5943039235212109,0.004900227226433378,0 piqa,acc,0.7285092491838956,0.010376251176596137,0 piqa,acc_norm,0.7388465723612623,0.010248738649935587,0 rte,acc,0.4548736462093863,0.029973636495415255,0 sciq,acc,0.915,0.008823426366942317,0 sciq,acc_norm,0.919,0.008632121032139993,0 storycloze_2016,acc,0.7247461250668092,0.010328538400500567,0 winogrande,acc,0.606156274664562,0.013732114472668741,0