ronig commited on
Commit
790c285
·
1 Parent(s): 2d194fe

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +1 -0
  2. tokenizer_config.json +1 -0
tokenizer.json CHANGED
@@ -66,6 +66,7 @@
66
  "continuing_subword_prefix": null,
67
  "end_of_word_suffix": null,
68
  "fuse_unk": false,
 
69
  "vocab": {
70
  "<unk>": 0,
71
  "<pad>": 1,
 
66
  "continuing_subword_prefix": null,
67
  "end_of_word_suffix": null,
68
  "fuse_unk": false,
69
+ "byte_fallback": false,
70
  "vocab": {
71
  "<unk>": 0,
72
  "<pad>": 1,
tokenizer_config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "model_max_length": 1000000000000000019884624838656,
3
  "pad_token": "<pad>",
4
  "tokenizer_class": "PreTrainedTokenizerFast"
 
1
  {
2
+ "clean_up_tokenization_spaces": true,
3
  "model_max_length": 1000000000000000019884624838656,
4
  "pad_token": "<pad>",
5
  "tokenizer_class": "PreTrainedTokenizerFast"