Pendrokar commited on
Commit
d627362
·
verified ·
1 Parent(s): ac7fa10

CLS SEP to ID 0

Browse files
Files changed (1) hide show
  1. tokenizer.json +61 -30
tokenizer.json CHANGED
@@ -24,40 +24,71 @@
24
  {
25
  "id":2230, "special":false,"content":"hashtag","single_word":false,"lstrip":false,"rstrip":false,"normalized":true},
26
  {
27
- "id":61005, "special":false,"content":"emoji","single_word":false,"lstrip":false,"rstrip":false,"normalized":true}],
 
28
  "normalizer":{
29
- "type":"BertNormalizer", "clean_text":true,"handle_chinese_chars":true,"strip_accents":false,"lowercase":false},
 
 
 
 
 
30
  "pre_tokenizer":{
31
  "type":"BertPreTokenizer"
32
  },
33
- "post_processor":{
34
- "type":"TemplateProcessing", "single":[{
35
- "SpecialToken":{
36
- "id":"[CLS]","type_id":0}},
37
- {
38
- "Sequence":{
39
- "id":"A","type_id":0}},
40
- {
41
- "SpecialToken":{
42
- "id":"[SEP]","type_id":0}}],"pair":[{
43
- "SpecialToken":{
44
- "id":"[CLS]","type_id":0}},
45
- {
46
- "Sequence":{
47
- "id":"A","type_id":0}},
48
- {
49
- "SpecialToken":{
50
- "id":"[SEP]","type_id":0}},
51
- {
52
- "Sequence":{
53
- "id":"B","type_id":1}},
54
- {
55
- "SpecialToken":{
56
- "id":"[SEP]","type_id":1}}],"special_tokens":{
57
- "[CLS]":{
58
- "id":"[CLS]", "ids":[4],"tokens":["[CLS]"]},"[SEP]":{
59
- "id":"[SEP]", "ids":[5],"tokens":["[SEP]"]}}},"decoder":{
60
- "type":"WordPiece", "prefix":"##","cleanup":true},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  "model":{
62
  "type":"WordPiece",
63
  "unk_token":"CUSTOM_UNKNOWN",
 
24
  {
25
  "id":2230, "special":false,"content":"hashtag","single_word":false,"lstrip":false,"rstrip":false,"normalized":true},
26
  {
27
+ "id":61005, "special":false,"content":"emoji","single_word":false,"lstrip":false,"rstrip":false,"normalized":true}
28
+ ],
29
  "normalizer":{
30
+ "type":"BertNormalizer",
31
+ "clean_text":true,
32
+ "handle_chinese_chars":true,
33
+ "strip_accents":true,
34
+ "lowercase":true
35
+ },
36
  "pre_tokenizer":{
37
  "type":"BertPreTokenizer"
38
  },
39
+ "post_processor":{
40
+ "type":"TemplateProcessing", "single":[{
41
+ "SpecialToken":{
42
+ "id":"[CLS]","type_id":0}
43
+ },
44
+ {
45
+ "Sequence":{
46
+ "id":"A","type_id":0}},
47
+ {
48
+ "SpecialToken":{
49
+ "id":"[SEP]","type_id":0}
50
+ }],
51
+ "pair":[{
52
+ "SpecialToken":{
53
+ "id":"[CLS]","type_id":0
54
+ }
55
+ },
56
+ {
57
+ "Sequence":{
58
+ "id":"A","type_id":0
59
+ }
60
+ },
61
+ {
62
+ "SpecialToken":{
63
+ "id":"[SEP]","type_id":0
64
+ }
65
+ },
66
+ {
67
+ "Sequence":{
68
+ "id":"B","type_id":1}
69
+ },
70
+ {
71
+ "SpecialToken":{
72
+ "id":"[SEP]","type_id":1
73
+ }
74
+ }
75
+ ],
76
+ "special_tokens":{
77
+ "[CLS]":{
78
+ "id":"[CLS]",
79
+ "ids":[0],
80
+ "tokens":["[CLS]"]
81
+ },
82
+ "[SEP]":{
83
+ "id":"[SEP]",
84
+ "ids":[0],
85
+ "tokens":["[SEP]"]
86
+ }
87
+ }
88
+ },
89
+ "decoder":{
90
+ "type":"WordPiece", "prefix":"##","cleanup":true
91
+ },
92
  "model":{
93
  "type":"WordPiece",
94
  "unk_token":"CUSTOM_UNKNOWN",