{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 9, "content": ";", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "Split", "pattern": { "Regex": "." }, "behavior": "Isolated", "invert": false }, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": ";", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } } ], "pair": [ { "Sequence": { "id": "A", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } } ], "special_tokens": { "": { "id": "", "ids": [ 0 ], "tokens": [ "" ] }, ";": { "id": ";", "ids": [ 9 ], "tokens": [ ";" ] } } }, "decoder": { "type": "Sequence", "decoders": [ { "type": "Replace", "pattern": { "String": " " }, "content": "▁" }, { "type": "Replace", "pattern": { "String": "▁" }, "content": " " } ] }, "model": { "type": "WordLevel", "vocab": { " ": 0, "1": 1, "2": 2, "3": 3, "4": 4, "5": 5, "6": 6, "7": 7, "8": 8, ";": 9, "#": 10, "a": 11, "b": 12, "c": 13, "d": 14, "e": 15, "f": 16, "g": 17, "h": 18, "n": 19, "r": 20, "q": 21, "k": 22 }, "unk_token": " " } }