dongjunguo commited on
Commit
4b09a69
·
verified ·
1 Parent(s): 6c05712

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +7 -0
  2. tokenizer_config.json +16 -0
  3. vocab.txt +28 -0
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "clean_up_tokenization_spaces": true,
3
+ "cls_token": "[CLS]",
4
+ "do_basic_tokenize": true,
5
+ "do_lower_case": false,
6
+ "full_tokenizer_file": null,
7
+ "mask_token": "[MASK]",
8
+ "model_max_length": 1000000000000000019884624838656,
9
+ "never_split": null,
10
+ "pad_token": "[PAD]",
11
+ "sep_token": "[SEP]",
12
+ "strip_accents": null,
13
+ "tokenize_chinese_chars": true,
14
+ "tokenizer_class": "RoFormerTokenizer",
15
+ "unk_token": "[UNK]"
16
+ }
vocab.txt ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [PAD]
2
+ [CLS]
3
+ [SEP]
4
+ [MASK]
5
+ [UNK]
6
+ A
7
+ C
8
+ D
9
+ E
10
+ F
11
+ G
12
+ H
13
+ I
14
+ K
15
+ L
16
+ M
17
+ N
18
+ P
19
+ Q
20
+ R
21
+ S
22
+ T
23
+ V
24
+ W
25
+ Y
26
+ X
27
+
28
+