Muennighoff's picture
Add eval
b851397
raw
history blame
2.46 kB
{
"results": {
"anli_r1": {
"acc": 0.332,
"acc_stderr": 0.014899597242811475
},
"anli_r2": {
"acc": 0.316,
"acc_stderr": 0.014709193056057106
},
"anli_r3": {
"acc": 0.31666666666666665,
"acc_stderr": 0.013434078660827384
},
"cb": {
"acc": 0.30357142857142855,
"acc_stderr": 0.06199938655510754,
"f1": 0.2503507986266607
},
"copa": {
"acc": 0.8,
"acc_stderr": 0.040201512610368445
},
"hellaswag": {
"acc": 0.4788886675960964,
"acc_stderr": 0.004985331652408345,
"acc_norm": 0.6412069308902609,
"acc_norm_stderr": 0.004786660691181937
},
"rte": {
"acc": 0.5740072202166066,
"acc_stderr": 0.02976495674177765
},
"winogrande": {
"acc": 0.5911602209944752,
"acc_stderr": 0.013816954295135684
},
"storycloze_2016": {
"acc": 0.7279529663281668,
"acc_stderr": 0.010290888060871242
},
"boolq": {
"acc": 0.6275229357798165,
"acc_stderr": 0.008455846866956086
},
"arc_easy": {
"acc": 0.6401515151515151,
"acc_stderr": 0.009848484848484846,
"acc_norm": 0.6296296296296297,
"acc_norm_stderr": 0.009908978578665755
},
"arc_challenge": {
"acc": 0.30887372013651876,
"acc_stderr": 0.013501770929344003,
"acc_norm": 0.32849829351535836,
"acc_norm_stderr": 0.013724978465537377
},
"sciq": {
"acc": 0.921,
"acc_stderr": 0.008534156773333445,
"acc_norm": 0.908,
"acc_norm_stderr": 0.00914437639315112
},
"piqa": {
"acc": 0.750272034820457,
"acc_stderr": 0.010099232969867486,
"acc_norm": 0.764961915125136,
"acc_norm_stderr": 0.009893146688805312
}
},
"versions": {
"anli_r1": 0,
"anli_r2": 0,
"anli_r3": 0,
"cb": 1,
"copa": 0,
"hellaswag": 0,
"rte": 0,
"winogrande": 0,
"storycloze_2016": 0,
"boolq": 1,
"arc_easy": 0,
"arc_challenge": 0,
"sciq": 0,
"piqa": 0
}
}