|
task,metric,value,err,version
|
|
anli_r1,acc,0.348,0.01507060460376841,0
|
|
anli_r2,acc,0.342,0.015008706182121738,0
|
|
anli_r3,acc,0.33,0.013579531277800918,0
|
|
arc_challenge,acc,0.28754266211604096,0.01322671905626613,0
|
|
arc_challenge,acc_norm,0.31313993174061433,0.013552671543623504,0
|
|
arc_easy,acc,0.5900673400673401,0.010091953527506246,0
|
|
arc_easy,acc_norm,0.5791245791245792,0.01013050216406634,0
|
|
boolq,acc,0.6201834862385321,0.008488668235778613,1
|
|
cb,acc,0.26785714285714285,0.05971290310957636,1
|
|
cb,f1,0.2511904761904762,,1
|
|
copa,acc,0.81,0.03942772444036623,0
|
|
hellaswag,acc,0.4565823541127266,0.0049709334202319285,0
|
|
hellaswag,acc_norm,0.6061541525592511,0.0048760280379419405,0
|
|
piqa,acc,0.7317736670293797,0.010336761992404485,0
|
|
piqa,acc_norm,0.7448313384113167,0.010171571592521828,0
|
|
rte,acc,0.5379061371841155,0.030009848912529117,0
|
|
sciq,acc,0.918,0.008680515615523746,0
|
|
sciq,acc_norm,0.917,0.00872852720607479,0
|
|
storycloze_2016,acc,0.7354355959380011,0.01020040054171416,0
|
|
winogrande,acc,0.6037884767166535,0.013746404157154946,0
|
|
|