Muennighoff's picture
Merge eval
5443e66
raw
history blame
2.47 kB
{
"results": {
"anli_r1": {
"acc": 0.334,
"acc_stderr": 0.014922019523732961
},
"anli_r2": {
"acc": 0.34,
"acc_stderr": 0.014987482264363935
},
"anli_r3": {
"acc": 0.35083333333333333,
"acc_stderr": 0.013782212417178197
},
"cb": {
"acc": 0.17857142857142858,
"acc_stderr": 0.051642771820087224,
"f1": 0.18279613107199313
},
"copa": {
"acc": 0.82,
"acc_stderr": 0.03861229196653697
},
"hellaswag": {
"acc": 0.454690300736905,
"acc_stderr": 0.004969251445596333,
"acc_norm": 0.5943039235212109,
"acc_norm_stderr": 0.004900227226433378
},
"rte": {
"acc": 0.4548736462093863,
"acc_stderr": 0.029973636495415255
},
"winogrande": {
"acc": 0.606156274664562,
"acc_stderr": 0.013732114472668741
},
"storycloze_2016": {
"acc": 0.7247461250668092,
"acc_stderr": 0.010328538400500567
},
"boolq": {
"acc": 0.6116207951070336,
"acc_stderr": 0.008524357307908792
},
"arc_easy": {
"acc": 0.5808080808080808,
"acc_stderr": 0.010124905282491183,
"acc_norm": 0.5711279461279462,
"acc_norm_stderr": 0.010155440652900152
},
"arc_challenge": {
"acc": 0.27559726962457337,
"acc_stderr": 0.01305716965576184,
"acc_norm": 0.30802047781569963,
"acc_norm_stderr": 0.01349142951729204
},
"sciq": {
"acc": 0.915,
"acc_stderr": 0.008823426366942317,
"acc_norm": 0.919,
"acc_norm_stderr": 0.008632121032139993
},
"piqa": {
"acc": 0.7285092491838956,
"acc_stderr": 0.010376251176596137,
"acc_norm": 0.7388465723612623,
"acc_norm_stderr": 0.010248738649935587
}
},
"versions": {
"anli_r1": 0,
"anli_r2": 0,
"anli_r3": 0,
"cb": 1,
"copa": 0,
"hellaswag": 0,
"rte": 0,
"winogrande": 0,
"storycloze_2016": 0,
"boolq": 1,
"arc_easy": 0,
"arc_challenge": 0,
"sciq": 0,
"piqa": 0
}
}