|
task,metric,value,err,version
|
|
anli_r1,acc,0.333,0.014910846164229868,0
|
|
anli_r2,acc,0.326,0.01483050720454104,0
|
|
anli_r3,acc,0.3475,0.013751753243291852,0
|
|
arc_challenge,acc,0.2627986348122867,0.012862523175351333,0
|
|
arc_challenge,acc_norm,0.30716723549488056,0.013481034054980943,0
|
|
arc_easy,acc,0.5913299663299664,0.010087174498762883,0
|
|
arc_easy,acc_norm,0.5496632996632996,0.010209047724374145,0
|
|
boolq,acc,0.5669724770642202,0.00866625130551806,1
|
|
cb,acc,0.5357142857142857,0.06724777654937658,1
|
|
cb,f1,0.37227304714989445,,1
|
|
copa,acc,0.79,0.040936018074033256,0
|
|
hellaswag,acc,0.47191794463254333,0.004981905293878145,0
|
|
hellaswag,acc_norm,0.6139215295757817,0.004858539527872466,0
|
|
piqa,acc,0.7448313384113167,0.010171571592521822,0
|
|
piqa,acc_norm,0.7535364526659413,0.01005481078967181,0
|
|
rte,acc,0.5703971119133574,0.029796668829124674,0
|
|
sciq,acc,0.836,0.011715000693181331,0
|
|
sciq,acc_norm,0.781,0.013084731950262012,0
|
|
storycloze_2016,acc,0.7151256012827365,0.01043751398661172,0
|
|
winogrande,acc,0.5706393054459353,0.013911537499969163,0
|
|
|