|
task,metric,value,err,version
|
|
anli_r1,acc,0.315,0.014696631960792498,0
|
|
anli_r2,acc,0.341,0.014998131348402702,0
|
|
anli_r3,acc,0.34,0.0136804957257678,0
|
|
arc_challenge,acc,0.2636518771331058,0.01287592915129705,0
|
|
arc_challenge,acc_norm,0.2858361774744027,0.013203196088537369,0
|
|
arc_easy,acc,0.5593434343434344,0.010187264635711983,0
|
|
arc_easy,acc_norm,0.5298821548821548,0.010241444322886432,0
|
|
boolq,acc,0.6119266055045871,0.008523130584760851,1
|
|
cb,acc,0.14285714285714285,0.04718416136255829,1
|
|
cb,f1,0.143010752688172,,1
|
|
copa,acc,0.79,0.040936018074033256,0
|
|
hellaswag,acc,0.4473212507468632,0.004962010338226347,0
|
|
hellaswag,acc_norm,0.5848436566421031,0.0049174193677660296,0
|
|
piqa,acc,0.7089227421109902,0.010598612490942586,0
|
|
piqa,acc_norm,0.7143634385201306,0.010539303948661916,0
|
|
rte,acc,0.4729241877256318,0.0300523034631437,0
|
|
sciq,acc,0.906,0.009233052000787738,0
|
|
sciq,acc_norm,0.902,0.009406619184621226,0
|
|
storycloze_2016,acc,0.7156600748262961,0.01043161412866526,0
|
|
winogrande,acc,0.601420678768745,0.013760357176873838,0
|
|
|