|
task,metric,value,err,version
|
|
anli_r1,acc,0.329,0.014865395385928354,0
|
|
anli_r2,acc,0.336,0.014944140233795027,0
|
|
anli_r3,acc,0.3383333333333333,0.013664144006618266,0
|
|
arc_challenge,acc,0.2781569965870307,0.013094469919538805,0
|
|
arc_challenge,acc_norm,0.30887372013651876,0.013501770929344003,0
|
|
arc_easy,acc,0.5993265993265994,0.01005530447425557,0
|
|
arc_easy,acc_norm,0.5576599326599326,0.01019133444422085,0
|
|
boolq,acc,0.5660550458715596,0.008668405003744129,1
|
|
cb,acc,0.48214285714285715,0.06737697508644648,1
|
|
cb,f1,0.3338011695906433,,1
|
|
copa,acc,0.79,0.040936018074033256,0
|
|
hellaswag,acc,0.4697271459868552,0.004980627287147585,0
|
|
hellaswag,acc_norm,0.6141206930890261,0.004858074013443988,0
|
|
piqa,acc,0.7470076169749728,0.01014288869886246,0
|
|
piqa,acc_norm,0.7519042437431991,0.010077118315574706,0
|
|
rte,acc,0.5523465703971119,0.02993107036293953,0
|
|
sciq,acc,0.835,0.011743632866916145,0
|
|
sciq,acc_norm,0.79,0.01288666233227453,0
|
|
storycloze_2016,acc,0.7156600748262961,0.010431614128665253,0
|
|
winogrande,acc,0.574585635359116,0.013895257666646378,0
|
|
|