CrabInHoney's picture
Upload 7 files
89e5d1d verified
{
"version": "1.0",
"truncation": {
"direction": "Right",
"max_length": 64,
"strategy": "LongestFirst",
"stride": 0
},
"padding": {
"strategy": {
"Fixed": 64
},
"direction": "Right",
"pad_to_multiple_of": null,
"pad_id": 0,
"pad_type_id": 0,
"pad_token": "[PAD]"
},
"added_tokens": [
{
"id": 0,
"content": "[PAD]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "[UNK]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "[CLS]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "[SEP]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 4,
"content": "[MASK]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": {
"type": "BertNormalizer",
"clean_text": true,
"handle_chinese_chars": true,
"strip_accents": null,
"lowercase": true
},
"pre_tokenizer": {
"type": "BertPreTokenizer"
},
"post_processor": {
"type": "TemplateProcessing",
"single": [
{
"SpecialToken": {
"id": "[CLS]",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "[SEP]",
"type_id": 0
}
}
],
"pair": [
{
"SpecialToken": {
"id": "[CLS]",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "[SEP]",
"type_id": 0
}
},
{
"Sequence": {
"id": "B",
"type_id": 1
}
},
{
"SpecialToken": {
"id": "[SEP]",
"type_id": 1
}
}
],
"special_tokens": {
"[CLS]": {
"id": "[CLS]",
"ids": [
2
],
"tokens": [
"[CLS]"
]
},
"[SEP]": {
"id": "[SEP]",
"ids": [
3
],
"tokens": [
"[SEP]"
]
}
}
},
"decoder": {
"type": "WordPiece",
"prefix": "##",
"cleanup": true
},
"model": {
"type": "WordPiece",
"unk_token": "[UNK]",
"continuing_subword_prefix": "##",
"max_input_chars_per_word": 100,
"vocab": {
"[PAD]": 0,
"[UNK]": 1,
"[CLS]": 2,
"[SEP]": 3,
"[MASK]": 4,
"http": 5,
"https": 6,
"www": 7,
"/": 8,
".": 9,
":": 10,
"&": 11,
"?": 12,
"-": 13,
"_": 14,
"%": 15,
"##0": 16,
"##1": 17,
"##2": 18,
"##3": 19,
"##4": 20,
"##5": 21,
"##6": 22,
"##7": 23,
"##8": 24,
"##9": 25,
"0": 26,
"1": 27,
"2": 28,
"3": 29,
"4": 30,
"5": 31,
"6": 32,
"7": 33,
"8": 34,
"9": 35,
"z": 36,
"y": 37,
"x": 38,
"w": 39,
"v": 40,
"u": 41,
"t": 42,
"s": 43,
"r": 44,
"q": 45,
"p": 46,
"o": 47,
"n": 48,
"m": 49,
"l": 50,
"k": 51,
"j": 52,
"i": 53,
"h": 54,
"g": 55,
"f": 56,
"e": 57,
"d": 58,
"c": 59,
"b": 60,
"a": 61,
"##z": 62,
"##y": 63,
"##x": 64,
"##w": 65,
"##v": 66,
"##u": 67,
"##t": 68,
"##s": 69,
"##r": 70,
"##q": 71,
"##p": 72,
"##o": 73,
"##n": 74,
"##m": 75,
"##l": 76,
"##k": 77,
"##j": 78,
"##i": 79,
"##h": 80,
"##g": 81,
"##f": 82,
"##e": 83,
"##d": 84,
"##c": 85,
"##b": 86,
"##a": 87,
"##ing": 88,
"##ly": 89,
"##er": 90,
"##in": 91,
"##tion": 92,
"##re": 93,
"##un": 94,
"##ed": 95,
"##al": 96,
"##ter": 97,
"##de": 98,
"##con": 99,
"##an": 100,
"##ti": 101,
"##ic": 102,
"##cal": 103,
"##to": 104,
"##ty": 105,
"##ness": 106,
"##ta": 107,
"##di": 108,
"##la": 109,
"##en": 110,
"##es": 111,
"##ma": 112,
"##per": 113,
"##man": 114,
"##ri": 115,
"##na": 116,
"##ca": 117,
"##ex": 118,
"##dis": 119,
"##ra": 120,
"##ers": 121,
"##non": 122,
"##tions": 123,
"##com": 124,
"##ni": 125,
"##co": 126,
"##pro": 127,
"##tive": 128,
"##mi": 129,
"##pre": 130,
"##der": 131,
"##sub": 132,
"##able": 133,
"##tor": 134,
"##li": 135,
"##si": 136,
"##hy": 137,
"##mo": 138,
"##men": 139,
"##ar": 140,
"##im": 141,
"##ton": 142,
"##sis": 143,
"##tic": 144,
"##da": 145,
"##at": 146,
"##ci": 147,
"##or": 148,
"##lar": 149,
"##car": 150,
"##ment": 151,
"##lo": 152,
"##ac": 153,
"##cy": 154,
"##tu": 155,
"##less": 156,
"##as": 157,
"##um": 158,
"##pa": 159,
"##tal": 160,
"##ry": 161,
"##ro": 162,
"##fi": 163,
"##over": 164,
"##po": 165,
"##is": 166,
"##son": 167,
"##so": 168,
"##do": 169,
"##cu": 170,
"##bi": 171,
"##be": 172,
"##tri": 173,
"##ful": 174,
"##vi": 175,
"##mis": 176,
"##su": 177,
"##va": 178,
"##ous": 179,
"ftp": 180,
"tel": 181,
"file": 182,
"ws": 183,
"wss": 184,
"ssh": 185,
"ldaps": 186,
"gopher": 187,
"view": 188,
"source": 189,
"about": 190,
"chrome": 191,
"data": 192,
"irc": 193,
"magnet": 194,
"mms": 195,
"redis": 196,
"svn": 197,
"vnc": 198,
"dns": 199,
"ntp": 200,
"ip": 201,
"com": 202,
"de": 203,
"net": 204,
"uk": 205,
"cn": 206,
"org": 207,
"info": 208,
"nl": 209,
"eu": 210,
"ru": 211,
"su": 212,
"br": 213,
"htm": 214,
"php": 215,
"co": 216,
"ly": 217,
"bit": 218,
"log": 219,
"index": 220,
"bank": 221,
"za": 222,
"direct": 223,
"mail": 224,
"it": 225,
"run": 226,
"security": 227,
"code": 228,
"promo": 229,
"jpg": 230,
"img": 231,
"pay": 232,
"form": 233,
"docs": 234,
"host": 235,
"ec": 236,
"cx": 237,
"free": 238,
"true": 239,
"amp": 240,
"blog": 241,
"key": 242,
"pal": 243,
"contact": 244,
"online": 245,
"abc": 246,
"media": 247,
"admin": 248,
"etc": 249,
"login": 250,
"cmd": 251,
"bin": 252,
"web": 253,
"verif": 254,
"the": 255,
"in": 256,
"of": 257,
"la": 258,
"en": 259,
"and": 260,
"to": 261,
"der": 262,
"un": 263,
"di": 264,
"que": 265,
"is": 266,
"el": 267,
"se": 268,
"del": 269,
"die": 270,
"und": 271,
"et": 272,
"na": 273,
"was": 274,
"on": 275,
"des": 276,
"den": 277,
"le": 278,
"for": 279,
"da": 280,
"je": 281,
"van": 282,
"as": 283,
"sa": 284,
"do": 285,
"an": 286,
"les": 287,
"una": 288,
"il": 289,
"by": 290,
"og": 291,
"at": 292,
"er": 293,
"al": 294,
"von": 295,
"du": 296,
"av": 297,
"med": 298,
"con": 299,
"est": 300,
"per": 301,
"som": 302,
"los": 303,
"por": 304,
"from": 305,
"that": 306,
"no": 307,
"11": 308,
"es": 309,
"ja": 310,
"km": 311,
"om": 312,
"im": 313,
"dan": 314,
"para": 315,
"mit": 316,
"El": 317,
"his": 318,
"ha": 319,
"une": 320,
"das": 321,
"par": 322,
"au": 323,
"dans": 324,
"he": 325,
"che": 326,
"em": 327,
"dem": 328,
"til": 329,
"се": 330,
"han": 331,
"las": 332,
"della": 333,
"new": 334,
"um": 335,
"si": 336,
"var": 337,
"are": 338,
"op": 339,
"zu": 340,
"were": 341,
"od": 342,
"son": 343,
"which": 344,
"va": 345,
"pour": 346,
"ve": 347,
"sur": 348,
"war": 349,
"be": 350,
"det": 351,
"gov": 352,
"qui": 353,
"az": 354,
"te": 355,
"had": 356,
"also": 357,
"so": 358,
"am": 359,
"has": 360,
"dos": 361,
"ur": 362,
"entre": 363,
"lo": 364,
"era": 365,
"ni": 366,
"first": 367,
"os": 368,
"met": 369,
"ou": 370,
"all": 371,
"aus": 372,
"non": 373,
"film": 374,
"po": 375,
"into": 376,
"till": 377,
"ble": 378,
"ka": 379,
"mai": 380,
"up": 381,
"ng": 382,
"aux": 383,
"ad": 384,
"ki": 385,
"me": 386,
"ze": 387,
"can": 388,
"out": 389,
"wie": 390,
"со": 391,
"fu": 392,
"vom": 393,
"nu": 394,
"club": 395,
"team": 396,
"ca": 397,
"pe": 398,
"ke": 399
}
}
}