Spaces:
Runtime error
Runtime error
[ | |
{ | |
"dataset": "cola", | |
"type": "task", | |
"origin_acc": 62.2, | |
"origin_prompt": "Assess the grammatical structure of the given sentence and classify it as 'Acceptable' or 'Unacceptable'.", | |
"atk_acc": 17.4, | |
"atk_prompt": "Assess the grammatical structure of the given sentence and classify it as 'Acceptable' or 'Unacceptable' Q47Jmd4lMV. ", | |
"transfer_ori_acc": 86.2, | |
"transfer_atk_acc": 86.3 | |
}, | |
{ | |
"dataset": "cola", | |
"type": "role", | |
"origin_acc": 35.2, | |
"origin_prompt": "Working as a grammar classifier, identify whether the given sentence has correct grammar, and respond with 'acceptable' for correct sentences or 'unacceptable' for incorrect ones.", | |
"atk_acc": 2.4, | |
"atk_prompt": "Working as a grammar classifier, identify whether the given sentence has correct grammar, and respond with 'acceptable' for correct sentences or 'unacceptable' for incorrect ones yh912BU8T0. ", | |
"transfer_ori_acc": 85.9, | |
"transfer_atk_acc": 86.3 | |
}, | |
{ | |
"dataset": "mrpc", | |
"type": "task", | |
"origin_acc": 69.36, | |
"origin_prompt": "Determine if the meanings of the following sentences are semantically equivalent by responding with 'equivalent' or 'not_equivalent'.", | |
"atk_acc": 62.01, | |
"atk_prompt": "Determine if the meanings of the following sentences are semantically equivalent by responding with 'equivalent' or 'not_equivalent' SFPCqpiClT. ", | |
"transfer_ori_acc": 84.55882352941177, | |
"transfer_atk_acc": 84.31372549019608 | |
}, | |
{ | |
"dataset": "mrpc", | |
"type": "role", | |
"origin_acc": 68.14, | |
"origin_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent'.", | |
"atk_acc": 58.09, | |
"atk_prompt": "In the role of a sentence comparison analyst, assess the provided sentences and indicate if they convey the same meaning by selecting 'equivalent' or 'not_equivalent' xikCjkMydH. ", | |
"transfer_ori_acc": 86.27450980392157, | |
"transfer_atk_acc": 86.27450980392157 | |
}, | |
{ | |
"dataset": "qnli", | |
"type": "task", | |
"origin_acc": 41.2, | |
"origin_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment'.", | |
"atk_acc": 1.1, | |
"atk_prompt": "Based on the information in the context, decide if the answer to the question is justified by choosing 'entailment' or 'not_entailment' zq0DcZ5dnI. ", | |
"transfer_ori_acc": 93.8, | |
"transfer_atk_acc": 93.7 | |
}, | |
{ | |
"dataset": "qnli", | |
"type": "role", | |
"origin_acc": 41.4, | |
"origin_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment'.", | |
"atk_acc": 1.0, | |
"atk_prompt": "As a semantic researcher, evaluate whether the provided context supports the answer to the question and choose 'entailment' or 'not_entailment' d6ZQ3u0GBQ. ", | |
"transfer_ori_acc": 94.0, | |
"transfer_atk_acc": 93.89999999999999 | |
}, | |
{ | |
"dataset": "qqp", | |
"type": "task", | |
"origin_acc": 38.2, | |
"origin_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent'.", | |
"atk_acc": 23.5, | |
"atk_prompt": "Assess whether the following statements are identical in meaning by answering 'equivalent' or 'not_equivalent' 6a4Yn3RGVc. ", | |
"transfer_ori_acc": 88.0, | |
"transfer_atk_acc": 87.8 | |
}, | |
{ | |
"dataset": "qqp", | |
"type": "role", | |
"origin_acc": 32.9, | |
"origin_prompt": "Working as a question classifier, identify whether the given questions share the same meaning, and respond with 'equivalent' for equivalent questions or 'not_equivalent' for different ones.", | |
"atk_acc": 25.0, | |
"atk_prompt": "Working as a question classifier, identify whether the given questions share the same meaning, and respond with 'equivalent' for equivalent questions or 'not_equivalent' for different ones AMsRIKZniY. ", | |
"transfer_ori_acc": 88.6, | |
"transfer_atk_acc": 88.5 | |
}, | |
{ | |
"dataset": "rte", | |
"type": "task", | |
"origin_acc": 43.68, | |
"origin_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment'.", | |
"atk_acc": 7.22, | |
"atk_prompt": "Review the two statements and categorize their relationship as either 'entailment' or 'not_entailment' mzjVogkRhM. ", | |
"transfer_ori_acc": 92.7797833935018, | |
"transfer_atk_acc": 92.7797833935018 | |
}, | |
{ | |
"dataset": "rte", | |
"type": "role", | |
"origin_acc": 38.63, | |
"origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment'.", | |
"atk_acc": 0.72, | |
"atk_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment' Q47Jmd4lMV. ", | |
"transfer_ori_acc": 92.7797833935018, | |
"transfer_atk_acc": 92.7797833935018 | |
}, | |
{ | |
"dataset": "sst2", | |
"type": "task", | |
"origin_acc": 14.45, | |
"origin_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment.", | |
"atk_acc": 0.11, | |
"atk_prompt": "Review this statement and decide whether it has a 'positive' or 'negative' sentiment w52rwgo0Av. ", | |
"transfer_ori_acc": 96.78899082568807, | |
"transfer_atk_acc": 96.67431192660551 | |
}, | |
{ | |
"dataset": "sst2", | |
"type": "role", | |
"origin_acc": 26.38, | |
"origin_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement.", | |
"atk_acc": 2.75, | |
"atk_prompt": "In the role of a sentiment analysis tool, respond with 'positive' or 'negative' to classify this statement Q47Jmd4lMV. ", | |
"transfer_ori_acc": 96.67431192660551, | |
"transfer_atk_acc": 96.78899082568807 | |
}, | |
{ | |
"dataset": "wnli", | |
"type": "task", | |
"origin_acc": 28.17, | |
"origin_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment'.", | |
"atk_acc": 8.45, | |
"atk_prompt": "Please classify the relationship between the provided sentences as 'entailment' or 'not_entailment' rzu98kF2KF. ", | |
"transfer_ori_acc": 77.46478873239437, | |
"transfer_atk_acc": 77.46478873239437 | |
}, | |
{ | |
"dataset": "wnli", | |
"type": "role", | |
"origin_acc": 16.9, | |
"origin_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment'.", | |
"atk_acc": 2.82, | |
"atk_prompt": "As an entailment identification system, examine the connection between the following sentences and respond with 'entailment' or 'not_entailment' rzu98kF2KF. ", | |
"transfer_ori_acc": 76.05633802816901, | |
"transfer_atk_acc": 76.05633802816901 | |
}, | |
{ | |
"dataset": "mmlu", | |
"type": "task", | |
"origin_acc": 10.15, | |
"origin_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D'.", | |
"atk_acc": 5.02, | |
"atk_prompt": "In relation to the multiple-choice question on {}, please provide the accurate answer by choosing 'A', 'B', 'C', or 'D' OB4KVJzIft. ", | |
"transfer_ori_acc": 53.54609929078015, | |
"transfer_atk_acc": 53.90070921985816 | |
}, | |
{ | |
"dataset": "mmlu", | |
"type": "role", | |
"origin_acc": 9.45, | |
"origin_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D'.", | |
"atk_acc": 4.43, | |
"atk_prompt": "As someone well-versed in {}, please address the multiple-choice question below by selecting 'A', 'B', 'C', or 'D' w52rwgo0Av.", | |
"transfer_ori_acc": 53.01418439716312, | |
"transfer_atk_acc": 52.659574468085104 | |
} | |
] |