task,metric,value,err,version anli_r1,acc,0.315,0.014696631960792498,0 anli_r2,acc,0.341,0.014998131348402702,0 anli_r3,acc,0.34,0.0136804957257678,0 arc_challenge,acc,0.2636518771331058,0.01287592915129705,0 arc_challenge,acc_norm,0.2858361774744027,0.013203196088537369,0 arc_easy,acc,0.5593434343434344,0.010187264635711983,0 arc_easy,acc_norm,0.5298821548821548,0.010241444322886432,0 boolq,acc,0.6119266055045871,0.008523130584760851,1 cb,acc,0.14285714285714285,0.04718416136255829,1 cb,f1,0.143010752688172,,1 copa,acc,0.79,0.040936018074033256,0 hellaswag,acc,0.4473212507468632,0.004962010338226347,0 hellaswag,acc_norm,0.5848436566421031,0.0049174193677660296,0 piqa,acc,0.7089227421109902,0.010598612490942586,0 piqa,acc_norm,0.7143634385201306,0.010539303948661916,0 rte,acc,0.4729241877256318,0.0300523034631437,0 sciq,acc,0.906,0.009233052000787738,0 sciq,acc_norm,0.902,0.009406619184621226,0 storycloze_2016,acc,0.7156600748262961,0.01043161412866526,0 winogrande,acc,0.601420678768745,0.013760357176873838,0