Add SetFit model

Browse files

Files changed (9) hide show

added_tokens.json +1 -0
config.json +1 -1
config_sentence_transformers.json +2 -2
model_head.pkl +2 -2
modules.json +6 -0
pytorch_model.bin +2 -2
sentence_bert_config.json +1 -1
tokenizer.json +19 -78
tokenizer_config.json +15 -2

added_tokens.json CHANGED Viewed

@@ -3,5 +3,6 @@
   "<mask>": 30526,
   "<pad>": 1,
   "<s>": 0,
   "[UNK]": 104
 }

   "<mask>": 30526,
   "<pad>": 1,
   "<s>": 0,
+  "<unk>": 3,
   "[UNK]": 104
 }

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "/root/.cache/torch/sentence_transformers/sentence-transformers_paraphrase-mpnet-base-v2/",
   "architectures": [
     "MPNetModel"
   ],

 {
+  "_name_or_path": "/root/.cache/torch/sentence_transformers/sentence-transformers_all-mpnet-base-v2/",
   "architectures": [
     "MPNetModel"
   ],

config_sentence_transformers.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "__version__": {
     "sentence_transformers": "2.0.0",
-    "transformers": "4.7.0",
-    "pytorch": "1.9.0+cu102"
   }
 }

 {
   "__version__": {
     "sentence_transformers": "2.0.0",
+    "transformers": "4.6.1",
+    "pytorch": "1.8.1"
   }
 }

model_head.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:760b8bd405434c9b563219b7b4362375d686c38cf9c042d62ab889280944da4e
-size 6991

 version https://git-lfs.github.com/spec/v1
+oid sha256:dbe1b7dcdc2482ce048e4d02af7f45e7f133f9aa6030f05f3782a89999438006
+size 16267

modules.json CHANGED Viewed

@@ -10,5 +10,11 @@
     "name": "1",
     "path": "1_Pooling",
     "type": "sentence_transformers.models.Pooling"
   }
 ]

     "name": "1",
     "path": "1_Pooling",
     "type": "sentence_transformers.models.Pooling"
+  },
+  {
+    "idx": 2,
+    "name": "2",
+    "path": "2_Normalize",
+    "type": "sentence_transformers.models.Normalize"
   }
 ]

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c64b1cc4a56b07faa461c7564cf893ed9c7e821bf13325499d27d8ac8f706bab
-size 438009257

 version https://git-lfs.github.com/spec/v1
+oid sha256:89512a7c06eb96f01e31cf2738e5f211fa6f105f40d5c8c830f70c6e6a865a87
+size 438012073

sentence_bert_config.json CHANGED Viewed

@@ -1,4 +1,4 @@
 {
-  "max_seq_length": 512,
   "do_lower_case": false
 }

 {
+  "max_seq_length": 384,
   "do_lower_case": false
 }

tokenizer.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "version": "1.0",
   "truncation": {
     "direction": "Right",
-    "max_length": 512,
     "strategy": "LongestFirst",
     "stride": 0
   },
@@ -42,6 +42,15 @@
       "normalized": false,
       "special": true
     },
     {
       "id": 104,
       "content": "[UNK]",
@@ -72,85 +81,17 @@
     "type": "BertPreTokenizer"
   },
   "post_processor": {
-    "type": "TemplateProcessing",
-    "single": [
-      {
-        "SpecialToken": {
-          "id": "<s>",
-          "type_id": 0
-        }
-      },
-      {
-        "Sequence": {
-          "id": "A",
-          "type_id": 0
-        }
-      },
-      {
-        "SpecialToken": {
-          "id": "</s>",
-          "type_id": 0
-        }
-      }
     ],
-    "pair": [
-      {
-        "SpecialToken": {
-          "id": "<s>",
-          "type_id": 0
-        }
-      },
-      {
-        "Sequence": {
-          "id": "A",
-          "type_id": 0
-        }
-      },
-      {
-        "SpecialToken": {
-          "id": "</s>",
-          "type_id": 0
-        }
-      },
-      {
-        "SpecialToken": {
-          "id": "</s>",
-          "type_id": 0
-        }
-      },
-      {
-        "Sequence": {
-          "id": "B",
-          "type_id": 1
-        }
-      },
-      {
-        "SpecialToken": {
-          "id": "</s>",
-          "type_id": 1
-        }
-      }
     ],
-    "special_tokens": {
-      "</s>": {
-        "id": "</s>",
-        "ids": [
-          2
-        ],
-        "tokens": [
-          "</s>"
-        ]
-      },
-      "<s>": {
-        "id": "<s>",
-        "ids": [
-          0
-        ],
-        "tokens": [
-          "<s>"
-        ]
-      }
-    }
   },
   "decoder": {
     "type": "WordPiece",

   "version": "1.0",
   "truncation": {
     "direction": "Right",
+    "max_length": 384,
     "strategy": "LongestFirst",
     "stride": 0
   },
       "normalized": false,
       "special": true
     },
+    {
+      "id": 3,
+      "content": "<unk>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": true,
+      "special": true
+    },
     {
       "id": 104,
       "content": "[UNK]",
     "type": "BertPreTokenizer"
   },
   "post_processor": {
+    "type": "RobertaProcessing",
+    "sep": [
+      "</s>",
+      2
     ],
+    "cls": [
+      "<s>",
+      0
     ],
+    "trim_offsets": true,
+    "add_prefix_space": false
   },
   "decoder": {
     "type": "WordPiece",

tokenizer_config.json CHANGED Viewed

@@ -24,6 +24,14 @@
       "single_word": false,
       "special": true
     },
     "104": {
       "content": "[UNK]",
       "lstrip": false,
@@ -45,16 +53,21 @@
   "bos_token": "<s>",
   "clean_up_tokenization_spaces": true,
   "cls_token": "<s>",
-  "do_basic_tokenize": true,
   "do_lower_case": true,
   "eos_token": "</s>",
   "mask_token": "<mask>",
   "model_max_length": 512,
-  "never_split": null,
   "pad_token": "<pad>",
   "sep_token": "</s>",
   "strip_accents": null,
   "tokenize_chinese_chars": true,
   "tokenizer_class": "MPNetTokenizer",
   "unk_token": "[UNK]"
 }

       "single_word": false,
       "special": true
     },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
     "104": {
       "content": "[UNK]",
       "lstrip": false,
   "bos_token": "<s>",
   "clean_up_tokenization_spaces": true,
   "cls_token": "<s>",
   "do_lower_case": true,
   "eos_token": "</s>",
   "mask_token": "<mask>",
+  "max_length": 128,
   "model_max_length": 512,
+  "pad_to_multiple_of": null,
   "pad_token": "<pad>",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
   "sep_token": "</s>",
+  "stride": 0,
   "strip_accents": null,
   "tokenize_chinese_chars": true,
   "tokenizer_class": "MPNetTokenizer",
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
   "unk_token": "[UNK]"
 }