ayjays132 commited on
Commit
47b64a7
·
verified ·
1 Parent(s): 86b2f20

Update tokenizer_config.json

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +16 -3
tokenizer_config.json CHANGED
@@ -1050,8 +1050,21 @@
1050
  "clean_up_tokenization_spaces": true,
1051
  "eos_token": "</s>",
1052
  "extra_ids": 100,
1053
- "model_max_length": 512,
 
1054
  "pad_token": "<pad>",
 
 
1055
  "tokenizer_class": "T5Tokenizer",
1056
- "unk_token": "<unk>"
1057
- }
 
 
 
 
 
 
 
 
 
 
 
1050
  "clean_up_tokenization_spaces": true,
1051
  "eos_token": "</s>",
1052
  "extra_ids": 100,
1053
+ "max_length": 1024,
1054
+ "model_max_length": 1024,
1055
  "pad_token": "<pad>",
1056
+ "pad_token_type_id": 0,
1057
+ "padding_side": "right",
1058
  "tokenizer_class": "T5Tokenizer",
1059
+ "unk_token": "<unk>",
1060
+ "enable_token_classification": true,
1061
+ "normalization_rules": {
1062
+ "enable": true,
1063
+ "lowercase": true,
1064
+ "strip_accents": true,
1065
+ "pre_tokenization": {
1066
+ "punctuation_split": true,
1067
+ "split_digits": true
1068
+ }
1069
+ }
1070
+ }