ronig
/

pdb_bpe_tokenizer_1024_mlm

Model card Files Files and versions Community

ronig commited on Jun 20, 2023

Commit

790c285

·

1 Parent(s): 2d194fe

Upload tokenizer

Files changed (2) hide show

tokenizer.json +1 -0
tokenizer_config.json +1 -0

tokenizer.json CHANGED Viewed

@@ -66,6 +66,7 @@
     "continuing_subword_prefix": null,
     "end_of_word_suffix": null,
     "fuse_unk": false,
     "vocab": {
       "<unk>": 0,
       "<pad>": 1,

     "continuing_subword_prefix": null,
     "end_of_word_suffix": null,
     "fuse_unk": false,
+    "byte_fallback": false,
     "vocab": {
       "<unk>": 0,
       "<pad>": 1,

tokenizer_config.json CHANGED Viewed

@@ -1,4 +1,5 @@
 {
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "<pad>",
   "tokenizer_class": "PreTrainedTokenizerFast"

 {
+  "clean_up_tokenization_spaces": true,
   "model_max_length": 1000000000000000019884624838656,
   "pad_token": "<pad>",
   "tokenizer_class": "PreTrainedTokenizerFast"