|
{ |
|
"version": "1.0", |
|
"truncation": { |
|
"direction": "Right", |
|
"max_length": 64, |
|
"strategy": "LongestFirst", |
|
"stride": 0 |
|
}, |
|
"padding": { |
|
"strategy": { |
|
"Fixed": 64 |
|
}, |
|
"direction": "Right", |
|
"pad_to_multiple_of": null, |
|
"pad_id": 0, |
|
"pad_type_id": 0, |
|
"pad_token": "[PAD]" |
|
}, |
|
"added_tokens": [ |
|
{ |
|
"id": 0, |
|
"content": "[PAD]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 1, |
|
"content": "[UNK]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 2, |
|
"content": "[CLS]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 3, |
|
"content": "[SEP]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 4, |
|
"content": "[MASK]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
} |
|
], |
|
"normalizer": { |
|
"type": "BertNormalizer", |
|
"clean_text": true, |
|
"handle_chinese_chars": true, |
|
"strip_accents": null, |
|
"lowercase": true |
|
}, |
|
"pre_tokenizer": { |
|
"type": "BertPreTokenizer" |
|
}, |
|
"post_processor": { |
|
"type": "TemplateProcessing", |
|
"single": [ |
|
{ |
|
"SpecialToken": { |
|
"id": "[CLS]", |
|
"type_id": 0 |
|
} |
|
}, |
|
{ |
|
"Sequence": { |
|
"id": "A", |
|
"type_id": 0 |
|
} |
|
}, |
|
{ |
|
"SpecialToken": { |
|
"id": "[SEP]", |
|
"type_id": 0 |
|
} |
|
} |
|
], |
|
"pair": [ |
|
{ |
|
"SpecialToken": { |
|
"id": "[CLS]", |
|
"type_id": 0 |
|
} |
|
}, |
|
{ |
|
"Sequence": { |
|
"id": "A", |
|
"type_id": 0 |
|
} |
|
}, |
|
{ |
|
"SpecialToken": { |
|
"id": "[SEP]", |
|
"type_id": 0 |
|
} |
|
}, |
|
{ |
|
"Sequence": { |
|
"id": "B", |
|
"type_id": 1 |
|
} |
|
}, |
|
{ |
|
"SpecialToken": { |
|
"id": "[SEP]", |
|
"type_id": 1 |
|
} |
|
} |
|
], |
|
"special_tokens": { |
|
"[CLS]": { |
|
"id": "[CLS]", |
|
"ids": [ |
|
2 |
|
], |
|
"tokens": [ |
|
"[CLS]" |
|
] |
|
}, |
|
"[SEP]": { |
|
"id": "[SEP]", |
|
"ids": [ |
|
3 |
|
], |
|
"tokens": [ |
|
"[SEP]" |
|
] |
|
} |
|
} |
|
}, |
|
"decoder": { |
|
"type": "WordPiece", |
|
"prefix": "##", |
|
"cleanup": true |
|
}, |
|
"model": { |
|
"type": "WordPiece", |
|
"unk_token": "[UNK]", |
|
"continuing_subword_prefix": "##", |
|
"max_input_chars_per_word": 100, |
|
"vocab": { |
|
"[PAD]": 0, |
|
"[UNK]": 1, |
|
"[CLS]": 2, |
|
"[SEP]": 3, |
|
"[MASK]": 4, |
|
"http": 5, |
|
"https": 6, |
|
"www": 7, |
|
"/": 8, |
|
".": 9, |
|
":": 10, |
|
"&": 11, |
|
"?": 12, |
|
"-": 13, |
|
"_": 14, |
|
"%": 15, |
|
"##0": 16, |
|
"##1": 17, |
|
"##2": 18, |
|
"##3": 19, |
|
"##4": 20, |
|
"##5": 21, |
|
"##6": 22, |
|
"##7": 23, |
|
"##8": 24, |
|
"##9": 25, |
|
"0": 26, |
|
"1": 27, |
|
"2": 28, |
|
"3": 29, |
|
"4": 30, |
|
"5": 31, |
|
"6": 32, |
|
"7": 33, |
|
"8": 34, |
|
"9": 35, |
|
"z": 36, |
|
"y": 37, |
|
"x": 38, |
|
"w": 39, |
|
"v": 40, |
|
"u": 41, |
|
"t": 42, |
|
"s": 43, |
|
"r": 44, |
|
"q": 45, |
|
"p": 46, |
|
"o": 47, |
|
"n": 48, |
|
"m": 49, |
|
"l": 50, |
|
"k": 51, |
|
"j": 52, |
|
"i": 53, |
|
"h": 54, |
|
"g": 55, |
|
"f": 56, |
|
"e": 57, |
|
"d": 58, |
|
"c": 59, |
|
"b": 60, |
|
"a": 61, |
|
"##z": 62, |
|
"##y": 63, |
|
"##x": 64, |
|
"##w": 65, |
|
"##v": 66, |
|
"##u": 67, |
|
"##t": 68, |
|
"##s": 69, |
|
"##r": 70, |
|
"##q": 71, |
|
"##p": 72, |
|
"##o": 73, |
|
"##n": 74, |
|
"##m": 75, |
|
"##l": 76, |
|
"##k": 77, |
|
"##j": 78, |
|
"##i": 79, |
|
"##h": 80, |
|
"##g": 81, |
|
"##f": 82, |
|
"##e": 83, |
|
"##d": 84, |
|
"##c": 85, |
|
"##b": 86, |
|
"##a": 87, |
|
"##ing": 88, |
|
"##ly": 89, |
|
"##er": 90, |
|
"##in": 91, |
|
"##tion": 92, |
|
"##re": 93, |
|
"##un": 94, |
|
"##ed": 95, |
|
"##al": 96, |
|
"##ter": 97, |
|
"##de": 98, |
|
"##con": 99, |
|
"##an": 100, |
|
"##ti": 101, |
|
"##ic": 102, |
|
"##cal": 103, |
|
"##to": 104, |
|
"##ty": 105, |
|
"##ness": 106, |
|
"##ta": 107, |
|
"##di": 108, |
|
"##la": 109, |
|
"##en": 110, |
|
"##es": 111, |
|
"##ma": 112, |
|
"##per": 113, |
|
"##man": 114, |
|
"##ri": 115, |
|
"##na": 116, |
|
"##ca": 117, |
|
"##ex": 118, |
|
"##dis": 119, |
|
"##ra": 120, |
|
"##ers": 121, |
|
"##non": 122, |
|
"##tions": 123, |
|
"##com": 124, |
|
"##ni": 125, |
|
"##co": 126, |
|
"##pro": 127, |
|
"##tive": 128, |
|
"##mi": 129, |
|
"##pre": 130, |
|
"##der": 131, |
|
"##sub": 132, |
|
"##able": 133, |
|
"##tor": 134, |
|
"##li": 135, |
|
"##si": 136, |
|
"##hy": 137, |
|
"##mo": 138, |
|
"##men": 139, |
|
"##ar": 140, |
|
"##im": 141, |
|
"##ton": 142, |
|
"##sis": 143, |
|
"##tic": 144, |
|
"##da": 145, |
|
"##at": 146, |
|
"##ci": 147, |
|
"##or": 148, |
|
"##lar": 149, |
|
"##car": 150, |
|
"##ment": 151, |
|
"##lo": 152, |
|
"##ac": 153, |
|
"##cy": 154, |
|
"##tu": 155, |
|
"##less": 156, |
|
"##as": 157, |
|
"##um": 158, |
|
"##pa": 159, |
|
"##tal": 160, |
|
"##ry": 161, |
|
"##ro": 162, |
|
"##fi": 163, |
|
"##over": 164, |
|
"##po": 165, |
|
"##is": 166, |
|
"##son": 167, |
|
"##so": 168, |
|
"##do": 169, |
|
"##cu": 170, |
|
"##bi": 171, |
|
"##be": 172, |
|
"##tri": 173, |
|
"##ful": 174, |
|
"##vi": 175, |
|
"##mis": 176, |
|
"##su": 177, |
|
"##va": 178, |
|
"##ous": 179, |
|
"ftp": 180, |
|
"tel": 181, |
|
"file": 182, |
|
"ws": 183, |
|
"wss": 184, |
|
"ssh": 185, |
|
"ldaps": 186, |
|
"gopher": 187, |
|
"view": 188, |
|
"source": 189, |
|
"about": 190, |
|
"chrome": 191, |
|
"data": 192, |
|
"irc": 193, |
|
"magnet": 194, |
|
"mms": 195, |
|
"redis": 196, |
|
"svn": 197, |
|
"vnc": 198, |
|
"dns": 199, |
|
"ntp": 200, |
|
"ip": 201, |
|
"com": 202, |
|
"de": 203, |
|
"net": 204, |
|
"uk": 205, |
|
"cn": 206, |
|
"org": 207, |
|
"info": 208, |
|
"nl": 209, |
|
"eu": 210, |
|
"ru": 211, |
|
"su": 212, |
|
"br": 213, |
|
"htm": 214, |
|
"php": 215, |
|
"co": 216, |
|
"ly": 217, |
|
"bit": 218, |
|
"log": 219, |
|
"index": 220, |
|
"bank": 221, |
|
"za": 222, |
|
"direct": 223, |
|
"mail": 224, |
|
"it": 225, |
|
"run": 226, |
|
"security": 227, |
|
"code": 228, |
|
"promo": 229, |
|
"jpg": 230, |
|
"img": 231, |
|
"pay": 232, |
|
"form": 233, |
|
"docs": 234, |
|
"host": 235, |
|
"ec": 236, |
|
"cx": 237, |
|
"free": 238, |
|
"true": 239, |
|
"amp": 240, |
|
"blog": 241, |
|
"key": 242, |
|
"pal": 243, |
|
"contact": 244, |
|
"online": 245, |
|
"abc": 246, |
|
"media": 247, |
|
"admin": 248, |
|
"etc": 249, |
|
"login": 250, |
|
"cmd": 251, |
|
"bin": 252, |
|
"web": 253, |
|
"verif": 254, |
|
"the": 255, |
|
"in": 256, |
|
"of": 257, |
|
"la": 258, |
|
"en": 259, |
|
"and": 260, |
|
"to": 261, |
|
"der": 262, |
|
"un": 263, |
|
"di": 264, |
|
"que": 265, |
|
"is": 266, |
|
"el": 267, |
|
"se": 268, |
|
"del": 269, |
|
"die": 270, |
|
"und": 271, |
|
"et": 272, |
|
"na": 273, |
|
"was": 274, |
|
"on": 275, |
|
"des": 276, |
|
"den": 277, |
|
"le": 278, |
|
"for": 279, |
|
"da": 280, |
|
"je": 281, |
|
"van": 282, |
|
"as": 283, |
|
"sa": 284, |
|
"do": 285, |
|
"an": 286, |
|
"les": 287, |
|
"una": 288, |
|
"il": 289, |
|
"by": 290, |
|
"og": 291, |
|
"at": 292, |
|
"er": 293, |
|
"al": 294, |
|
"von": 295, |
|
"du": 296, |
|
"av": 297, |
|
"med": 298, |
|
"con": 299, |
|
"est": 300, |
|
"per": 301, |
|
"som": 302, |
|
"los": 303, |
|
"por": 304, |
|
"from": 305, |
|
"that": 306, |
|
"no": 307, |
|
"11": 308, |
|
"es": 309, |
|
"ja": 310, |
|
"km": 311, |
|
"om": 312, |
|
"im": 313, |
|
"dan": 314, |
|
"para": 315, |
|
"mit": 316, |
|
"El": 317, |
|
"his": 318, |
|
"ha": 319, |
|
"une": 320, |
|
"das": 321, |
|
"par": 322, |
|
"au": 323, |
|
"dans": 324, |
|
"he": 325, |
|
"che": 326, |
|
"em": 327, |
|
"dem": 328, |
|
"til": 329, |
|
"се": 330, |
|
"han": 331, |
|
"las": 332, |
|
"della": 333, |
|
"new": 334, |
|
"um": 335, |
|
"si": 336, |
|
"var": 337, |
|
"are": 338, |
|
"op": 339, |
|
"zu": 340, |
|
"were": 341, |
|
"od": 342, |
|
"son": 343, |
|
"which": 344, |
|
"va": 345, |
|
"pour": 346, |
|
"ve": 347, |
|
"sur": 348, |
|
"war": 349, |
|
"be": 350, |
|
"det": 351, |
|
"gov": 352, |
|
"qui": 353, |
|
"az": 354, |
|
"te": 355, |
|
"had": 356, |
|
"also": 357, |
|
"so": 358, |
|
"am": 359, |
|
"has": 360, |
|
"dos": 361, |
|
"ur": 362, |
|
"entre": 363, |
|
"lo": 364, |
|
"era": 365, |
|
"ni": 366, |
|
"first": 367, |
|
"os": 368, |
|
"met": 369, |
|
"ou": 370, |
|
"all": 371, |
|
"aus": 372, |
|
"non": 373, |
|
"film": 374, |
|
"po": 375, |
|
"into": 376, |
|
"till": 377, |
|
"ble": 378, |
|
"ka": 379, |
|
"mai": 380, |
|
"up": 381, |
|
"ng": 382, |
|
"aux": 383, |
|
"ad": 384, |
|
"ki": 385, |
|
"me": 386, |
|
"ze": 387, |
|
"can": 388, |
|
"out": 389, |
|
"wie": 390, |
|
"со": 391, |
|
"fu": 392, |
|
"vom": 393, |
|
"nu": 394, |
|
"club": 395, |
|
"team": 396, |
|
"ca": 397, |
|
"pe": 398, |
|
"ke": 399 |
|
} |
|
} |
|
} |