zyznull commited on
Commit
2f9d4ff
·
verified ·
1 Parent(s): a3b5d14

Update scripts/eval_mteb.py

Browse files
Files changed (1) hide show
  1. scripts/eval_mteb.py +63 -2
scripts/eval_mteb.py CHANGED
@@ -119,7 +119,19 @@ CMTEB_TASK_LIST = ['TNews', 'IFlyTek', 'MultilingualSentiment', 'JDReview', 'Onl
119
  'T2Retrieval', 'MMarcoRetrieval', 'DuRetrieval', 'CovidRetrieval', 'CmedqaRetrieval', 'EcomRetrieval', 'MedicalRetrieval', 'VideoRetrieval',
120
  'ATEC', 'BQ', 'LCQMC', 'PAWSX', 'STSB', 'AFQMC', 'QBQTC', 'STS22']
121
 
 
 
 
 
 
122
 
 
 
 
 
 
 
 
123
 
124
  logging.basicConfig(
125
  level=logging.INFO,
@@ -136,7 +148,6 @@ def get_detailed_instruct(task_description: str) -> str:
136
 
137
  def get_task_def_by_task_name_and_type(task_name: str, task_type: str, default_instruct='Given a web search query, retrieve relevant passages that answer the query') -> str:
138
  if task_type in ['STS']:
139
- # return "Given a premise, retrieve a hypothesis that is entailed by the premise."
140
  return "Retrieve semantically similar text"
141
 
142
  if task_type in ['Summarization']:
@@ -166,6 +177,13 @@ def get_task_def_by_task_name_and_type(task_name: str, task_type: str, default_i
166
  'JDReview': 'Classify the customer review for iPhone on e-commerce platform into positive or negative',
167
  'OnlineShopping': 'Classify the customer review for online shopping into positive or negative',
168
  'Waimai': 'Classify the customer review from a food takeaway platform into positive or negative',
 
 
 
 
 
 
 
169
  }
170
  return task_name_to_instruct[task_name]
171
 
@@ -187,6 +205,16 @@ def get_task_def_by_task_name_and_type(task_name: str, task_type: str, default_i
187
  'CLSClusteringP2P': 'Identify the main category of scholar papers based on the titles and abstracts',
188
  'ThuNewsClusteringS2S': 'Identify the topic or theme of the given news articles based on the titles',
189
  'ThuNewsClusteringP2P': 'Identify the topic or theme of the given news articles based on the titles and contents',
 
 
 
 
 
 
 
 
 
 
190
  }
191
  return task_name_to_instruct[task_name]
192
 
@@ -206,6 +234,16 @@ def get_task_def_by_task_name_and_type(task_name: str, task_type: str, default_i
206
  'CMedQAv2': 'Given a Chinese community medical question, retrieve replies that best answer the question',
207
  'Ocnli': 'Retrieve semantically similar text.',
208
  'Cmnli': 'Retrieve semantically similar text.',
 
 
 
 
 
 
 
 
 
 
209
  }
210
  return task_name_to_instruct[task_name]
211
 
@@ -237,6 +275,24 @@ def get_task_def_by_task_name_and_type(task_name: str, task_type: str, default_i
237
  'EcomRetrieval': 'Given a user query from an e-commerce website, retrieve description sentences of relevant products',
238
  'MedicalRetrieval': 'Given a medical question, retrieve user replies that best answer the question',
239
  'VideoRetrieval': 'Given a video search query, retrieve the titles of relevant videos',
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
  }
241
 
242
  # add lower case keys to match some beir names
@@ -629,9 +685,14 @@ def main(args):
629
  elif args.task == 'cmteb':
630
  task_names = CMTEB_TASK_LIST
631
  lang = ['zh','zh-CN']
 
 
 
 
 
632
  else:
633
  task_names = [args.task]
634
- lang = ['en','zh','zh-CN']
635
  for task in task_names:
636
  evaluation = MTEB(tasks=[task], task_langs=lang)
637
  task_cls = evaluation.tasks[0]
 
119
  'T2Retrieval', 'MMarcoRetrieval', 'DuRetrieval', 'CovidRetrieval', 'CmedqaRetrieval', 'EcomRetrieval', 'MedicalRetrieval', 'VideoRetrieval',
120
  'ATEC', 'BQ', 'LCQMC', 'PAWSX', 'STSB', 'AFQMC', 'QBQTC', 'STS22']
121
 
122
+ MTEB_PL = [
123
+ "CBD","PolEmo2.0-IN","PolEmo2.0-OUT","AllegroReviews","PAC","MassiveIntentClassification","MassiveScenarioClassification",
124
+ "SICK-E-PL","PPC","CDSC-E","PSC","8TagsClustering","SICK-R-PL","CDSC-R","STS22",
125
+ "ArguAna-PL","DBPedia-PL","FiQA-PL","HotpotQA-PL","MSMARCO-PL","NFCorpus-PL","NQ-PL","Quora-PL","SCIDOCS-PL","SciFact-PL","TRECCOVID-PL"
126
+ ]
127
 
128
+ MTEB_FR = [
129
+ "AmazonReviewsClassification","MasakhaNEWSClassification","MassiveIntentClassification",
130
+ "MassiveScenarioClassification","MTOPDomainClassification","MTOPIntentClassification","OpusparcusPC","PawsX",
131
+ "AlloProfClusteringP2P","AlloProfClusteringS2S","HALClusteringS2S","MasakhaNEWSClusteringP2P","MasakhaNEWSClusteringS2S","MLSUMClusteringP2P","MLSUMClusteringS2S",
132
+ "SyntecReranking","AlloprofReranking","AlloprofRetrieval","BSARDRetrieval","SyntecRetrieval","XPQARetrieval","MintakaRetrieval",
133
+ "SummEvalFr","STSBenchmarkMultilingualSTS","STS22","SICKFr"
134
+ ]
135
 
136
  logging.basicConfig(
137
  level=logging.INFO,
 
148
 
149
  def get_task_def_by_task_name_and_type(task_name: str, task_type: str, default_instruct='Given a web search query, retrieve relevant passages that answer the query') -> str:
150
  if task_type in ['STS']:
 
151
  return "Retrieve semantically similar text"
152
 
153
  if task_type in ['Summarization']:
 
177
  'JDReview': 'Classify the customer review for iPhone on e-commerce platform into positive or negative',
178
  'OnlineShopping': 'Classify the customer review for online shopping into positive or negative',
179
  'Waimai': 'Classify the customer review from a food takeaway platform into positive or negative',
180
+ # MTEB-pl eval instructions
181
+ "CBD":"Classify the sentiment of polish tweet reviews",
182
+ "PolEmo2.0-IN": "Classify the sentiment of in-domain (medicine and hotels) online reviews",
183
+ "PolEmo2.0-OUT":"Classify the sentiment of out-of-domain (products and school) online reviews",
184
+ "AllegroReviews": "Classify the sentiment of reviews from e-commerce marketplace Allegro",
185
+ "PAC": "Classify the sentence into one of the two types: \"BEZPIECZNE_POSTANOWIENIE_UMOWNE\" and \"KLAUZULA_ABUZYWNA\"",
186
+
187
  }
188
  return task_name_to_instruct[task_name]
189
 
 
205
  'CLSClusteringP2P': 'Identify the main category of scholar papers based on the titles and abstracts',
206
  'ThuNewsClusteringS2S': 'Identify the topic or theme of the given news articles based on the titles',
207
  'ThuNewsClusteringP2P': 'Identify the topic or theme of the given news articles based on the titles and contents',
208
+ # MTEB-fr eval instructions
209
+ "AlloProfClusteringP2P": "Identify the main category of Allo Prof document based on the titles and descriptions",
210
+ "AlloProfClusteringS2S": "Identify the main category of Allo Prof document based on the titles",
211
+ "HALClusteringS2S": "Identify the main category of academic passage based on the titles and contents",
212
+ "MasakhaNEWSClusteringP2P": "Identify the topic or theme of the given news articles based on the titles and contents",
213
+ "MasakhaNEWSClusteringS2S": "Identify the topic or theme of the given news articles based on the titles",
214
+ "MLSUMClusteringP2P": "Identify the topic or theme of the given articles based on the titles and contents",
215
+ "MLSUMClusteringS2S": "Identify the topic or theme of the given articles based on the titles",
216
+ # MTEB-pl eval instructions
217
+ "8TagsClustering": "Identify of headlines from social media posts in Polish into 8 categories: film, history, food, medicine, motorization, work, sport and technology",
218
  }
219
  return task_name_to_instruct[task_name]
220
 
 
234
  'CMedQAv2': 'Given a Chinese community medical question, retrieve replies that best answer the question',
235
  'Ocnli': 'Retrieve semantically similar text.',
236
  'Cmnli': 'Retrieve semantically similar text.',
237
+ # MTEB-fr eval instructions
238
+ "AlloprofReranking": "Given a question, retrieve passages that answer the question",
239
+ "OpusparcusPC":"Retrieve semantically similar text",
240
+ "PawsX":"Retrieve semantically similar text",
241
+ "SyntecReranking": "Given a question, retrieve passages that answer the question",
242
+ # MTEB-pl eval instructions
243
+ "SICK-E-PL": "Retrieve semantically similar text",
244
+ "PPC": "Retrieve semantically similar text",
245
+ "CDSC-E": "Retrieve semantically similar text",
246
+ "PSC": "Retrieve semantically similar text",
247
  }
248
  return task_name_to_instruct[task_name]
249
 
 
275
  'EcomRetrieval': 'Given a user query from an e-commerce website, retrieve description sentences of relevant products',
276
  'MedicalRetrieval': 'Given a medical question, retrieve user replies that best answer the question',
277
  'VideoRetrieval': 'Given a video search query, retrieve the titles of relevant videos',
278
+ # MTEB-fr eval instructions
279
+ "AlloprofRetrieval": "Given a question, retrieve passages that answer the question",
280
+ "BSARDRetrieval": "Given a question, retrieve passages that answer the question",
281
+ "SyntecRetrieval": "Given a question, retrieve passages that answer the question",
282
+ "XPQARetrieval": "Given a question, retrieve passages that answer the question",
283
+ "MintakaRetrieval": "Given a question, retrieve passages that answer the question",
284
+ # MTEB-pl eval instructions
285
+ "ArguAna-PL": "Given a claim, find documents that refute the claim",
286
+ "DBPedia-PL": "Given a query, retrieve relevant entity descriptions from DBPedia",
287
+ "FiQA-PL": "Given a financial question, retrieve user replies that best answer the question",
288
+ "HotpotQA-PL": "Given a multi-hop question, retrieve documents that can help answer the question",
289
+ "MSMARCO-PL": "Given a web search query, retrieve relevant passages that answer the query",
290
+ "NFCorpus-PL": "Given a question, retrieve relevant documents that best answer the question",
291
+ "NQ-PL": "Given a question, retrieve Wikipedia passages that answer the question",
292
+ "Quora-PL": "Given a question, retrieve questions that are semantically equivalent to the given question",
293
+ "SCIDOCS-PL": "Given a scientific paper title, retrieve paper abstracts that are cited by the given paper",
294
+ "SciFact-PL": "Given a scientific claim, retrieve documents that support or refute the claim",
295
+ "TRECCOVID-PL": "Given a query on COVID-19, retrieve documents that answer the query"
296
  }
297
 
298
  # add lower case keys to match some beir names
 
685
  elif args.task == 'cmteb':
686
  task_names = CMTEB_TASK_LIST
687
  lang = ['zh','zh-CN']
688
+ elif args.task == 'mteb-fr':
689
+ tas_names = MTEB_FR
690
+ lang = ['fr']
691
+ elif args.task == 'mteb-pl':
692
+ lang = ['pl']
693
  else:
694
  task_names = [args.task]
695
+ lang = ['en','zh','zh-CN','pl','fr']
696
  for task in task_names:
697
  evaluation = MTEB(tasks=[task], task_langs=lang)
698
  task_cls = evaluation.tasks[0]