Muennighoff commited on
Commit
4f1ef5f
Β·
1 Parent(s): 091482a

Update dataset

Browse files
Files changed (1) hide show
  1. app.py +6 -2
app.py CHANGED
@@ -58,11 +58,11 @@ TASK_LIST_CLASSIFICATION_NB = [
58
  ]
59
 
60
  TASK_LIST_CLASSIFICATION_PL = [
61
- "AbusiveClauses",
62
  "AllegroReviews",
63
  "CBD",
64
  "MassiveIntentClassification (pl)",
65
  "MassiveScenarioClassification (pl)",
 
66
  "PolEmo2.0-IN",
67
  "PolEmo2.0-OUT",
68
  ]
@@ -304,6 +304,7 @@ EXTERNAL_MODELS = [
304
  "gtr-t5-large",
305
  "gtr-t5-xl",
306
  "gtr-t5-xxl",
 
307
  "komninos",
308
  "luotuo-bert-medium",
309
  "LASER2",
@@ -379,6 +380,7 @@ EXTERNAL_MODEL_TO_LINK = {
379
  "gtr-t5-large": "https://huggingface.co/sentence-transformers/gtr-t5-large",
380
  "gtr-t5-xl": "https://huggingface.co/sentence-transformers/gtr-t5-xl",
381
  "gtr-t5-xxl": "https://huggingface.co/sentence-transformers/gtr-t5-xxl",
 
382
  "komninos": "https://huggingface.co/sentence-transformers/average_word_embeddings_komninos",
383
  "luotuo-bert-medium": "https://huggingface.co/silk-road/luotuo-bert-medium",
384
  "LASER2": "https://github.com/facebookresearch/LASER",
@@ -457,6 +459,7 @@ EXTERNAL_MODEL_TO_DIM = {
457
  "gtr-t5-large": 768,
458
  "gtr-t5-xl": 768,
459
  "gtr-t5-xxl": 768,
 
460
  "komninos": 300,
461
  "m3e-base": 768,
462
  "m3e-large": 768,
@@ -529,6 +532,7 @@ EXTERNAL_MODEL_TO_SEQLEN = {
529
  "gtr-t5-large": 512,
530
  "gtr-t5-xl": 512,
531
  "gtr-t5-xxl": 512,
 
532
  "komninos": "N/A",
533
  "luotuo-bert-medium": 512,
534
  "LASER2": "N/A",
@@ -604,6 +608,7 @@ EXTERNAL_MODEL_TO_SIZE = {
604
  "gtr-t5-large": 0.67,
605
  "gtr-t5-xl": 2.48,
606
  "gtr-t5-xxl": 9.73,
 
607
  "komninos": 0.27,
608
  "luotuo-bert-medium": 1.31,
609
  "LASER2": 0.17,
@@ -810,7 +815,6 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_
810
  # ],
811
  # },
812
  # Use "get" instead of dict indexing to skip incompat metadata instead of erroring out
813
- print("RUNNING", model)
814
  if len(datasets) > 0:
815
  task_results = [sub_res for sub_res in meta["model-index"][0]["results"] if (sub_res.get("task", {}).get("type", "") in tasks) and any([x in sub_res.get("dataset", {}).get("name", "") for x in datasets])]
816
  elif langs:
 
58
  ]
59
 
60
  TASK_LIST_CLASSIFICATION_PL = [
 
61
  "AllegroReviews",
62
  "CBD",
63
  "MassiveIntentClassification (pl)",
64
  "MassiveScenarioClassification (pl)",
65
+ "PAC",
66
  "PolEmo2.0-IN",
67
  "PolEmo2.0-OUT",
68
  ]
 
304
  "gtr-t5-large",
305
  "gtr-t5-xl",
306
  "gtr-t5-xxl",
307
+ "herbert-base-retrieval-v2",
308
  "komninos",
309
  "luotuo-bert-medium",
310
  "LASER2",
 
380
  "gtr-t5-large": "https://huggingface.co/sentence-transformers/gtr-t5-large",
381
  "gtr-t5-xl": "https://huggingface.co/sentence-transformers/gtr-t5-xl",
382
  "gtr-t5-xxl": "https://huggingface.co/sentence-transformers/gtr-t5-xxl",
383
+ "herbert-base-retrieval-v2": "https://huggingface.co/ipipan/herbert-base-retrieval-v2",
384
  "komninos": "https://huggingface.co/sentence-transformers/average_word_embeddings_komninos",
385
  "luotuo-bert-medium": "https://huggingface.co/silk-road/luotuo-bert-medium",
386
  "LASER2": "https://github.com/facebookresearch/LASER",
 
459
  "gtr-t5-large": 768,
460
  "gtr-t5-xl": 768,
461
  "gtr-t5-xxl": 768,
462
+ "herbert-base-retrieval-v2": 768,
463
  "komninos": 300,
464
  "m3e-base": 768,
465
  "m3e-large": 768,
 
532
  "gtr-t5-large": 512,
533
  "gtr-t5-xl": 512,
534
  "gtr-t5-xxl": 512,
535
+ "herbert-base-retrieval-v2": 514,
536
  "komninos": "N/A",
537
  "luotuo-bert-medium": 512,
538
  "LASER2": "N/A",
 
608
  "gtr-t5-large": 0.67,
609
  "gtr-t5-xl": 2.48,
610
  "gtr-t5-xxl": 9.73,
611
+ "herbert-base-retrieval-v2": 0.50,
612
  "komninos": 0.27,
613
  "luotuo-bert-medium": 1.31,
614
  "LASER2": 0.17,
 
815
  # ],
816
  # },
817
  # Use "get" instead of dict indexing to skip incompat metadata instead of erroring out
 
818
  if len(datasets) > 0:
819
  task_results = [sub_res for sub_res in meta["model-index"][0]["results"] if (sub_res.get("task", {}).get("type", "") in tasks) and any([x in sub_res.get("dataset", {}).get("name", "") for x in datasets])]
820
  elif langs: