|
task,metric,value,err,version
|
|
anli_r1,acc,0.338,0.014965960710224496,0
|
|
anli_r2,acc,0.345,0.015039986742055238,0
|
|
anli_r3,acc,0.3566666666666667,0.013833742805050717,0
|
|
arc_challenge,acc,0.29436860068259385,0.013318528460539429,0
|
|
arc_challenge,acc_norm,0.3319112627986348,0.01376098820088054,0
|
|
arc_easy,acc,0.627104377104377,0.009922743197129257,0
|
|
arc_easy,acc_norm,0.609006734006734,0.010012992232540631,0
|
|
boolq,acc,0.5923547400611621,0.008594580270731619,1
|
|
cb,acc,0.6071428571428571,0.0658538889806635,1
|
|
cb,f1,0.5367003367003368,,1
|
|
copa,acc,0.8,0.040201512610368445,0
|
|
hellaswag,acc,0.4826727743477395,0.004986784319771787,0
|
|
hellaswag,acc_norm,0.6368253335988847,0.004799317209902001,0
|
|
piqa,acc,0.7589771490750816,0.009979042717267314,0
|
|
piqa,acc_norm,0.7742110990206746,0.009754980670917311,0
|
|
rte,acc,0.5631768953068592,0.029855247390314945,0
|
|
sciq,acc,0.913,0.0089168666307459,0
|
|
sciq,acc_norm,0.897,0.009616833339695798,0
|
|
storycloze_2016,acc,0.7204703367183325,0.01037770209970486,0
|
|
winogrande,acc,0.6037884767166535,0.013746404157154949,0
|
|
|