|
{ |
|
"version": "1.0", |
|
"truncation": null, |
|
"padding": null, |
|
"added_tokens": [ |
|
{ |
|
"id": 0, |
|
"content": "<unk>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
} |
|
], |
|
"normalizer": { |
|
"type": "NFKC" |
|
}, |
|
"pre_tokenizer": { |
|
"type": "Metaspace", |
|
"replacement": "β", |
|
"add_prefix_space": true |
|
}, |
|
"post_processor": null, |
|
"decoder": { |
|
"type": "Metaspace", |
|
"replacement": "β", |
|
"add_prefix_space": true |
|
}, |
|
"model": { |
|
"type": "BPE", |
|
"dropout": null, |
|
"unk_token": "<unk>", |
|
"continuing_subword_prefix": null, |
|
"end_of_word_suffix": null, |
|
"fuse_unk": false, |
|
"byte_fallback": false, |
|
"vocab": { |
|
"<unk>": 0, |
|
"'": 1, |
|
"(": 2, |
|
")": 3, |
|
"*": 4, |
|
"+": 5, |
|
".": 6, |
|
"1": 7, |
|
"6": 8, |
|
"@": 9, |
|
"C": 10, |
|
"D": 11, |
|
"F": 12, |
|
"S": 13, |
|
"U": 14, |
|
"Z": 15, |
|
"[": 16, |
|
"\\": 17, |
|
"^": 18, |
|
"_": 19, |
|
"`": 20, |
|
"a": 21, |
|
"k": 22, |
|
"l": 23, |
|
"p": 24, |
|
"r": 25, |
|
"~": 26, |
|
"": 27, |
|
"Β": 28, |
|
"Β": 29, |
|
"Δ": 30, |
|
"Δ": 31, |
|
"β": 32, |
|
"'.": 33, |
|
"(+": 34, |
|
"(+'.": 35, |
|
"(+'.*": 36, |
|
")(+'.*": 37, |
|
"6(+'.*": 38, |
|
"D(+'.*": 39, |
|
")(+'.*6(+'.*": 40, |
|
"D(+'.*C": 41, |
|
")(+'.*6(+'.*D(+'.*C": 42, |
|
"*)(+'.*6(+'.*D(+'.*C": 43, |
|
"β1": 44, |
|
"β@": 45, |
|
"βF": 46, |
|
"βS": 47, |
|
"βU": 48, |
|
"βZ": 49, |
|
"β[": 50, |
|
"β^": 51, |
|
"β`": 52, |
|
"βk": 53, |
|
"βp": 54, |
|
"β~": 55, |
|
"βΒ": 56, |
|
"β*)(+'.*6(+'.*D(+'.*C": 57, |
|
"(+'.*)(+'.*6(+'.*D(+'.*C": 58, |
|
"β*)(+'.*6(+'.*D(+'.*C(+'.*)(+'.*6(+'.*D(+'.*C": 59, |
|
"β*)(+'.*6(+'.*D(+'.*C(+'.*)(+'.*6(+'.*D(+'.*C(+'.": 60, |
|
"β\\": 61, |
|
"β_": 62, |
|
"βa": 63, |
|
"βl": 64, |
|
"βr": 65, |
|
"β": 66, |
|
"βΒ": 67 |
|
}, |
|
"merges": [ |
|
"' .", |
|
"( +", |
|
"(+ '.", |
|
"(+'. *", |
|
") (+'.*", |
|
"6 (+'.*", |
|
"D (+'.*", |
|
")(+'.* 6(+'.*", |
|
"D(+'.* C", |
|
")(+'.*6(+'.* D(+'.*C", |
|
"* )(+'.*6(+'.*D(+'.*C", |
|
"β 1", |
|
"β @", |
|
"β F", |
|
"β S", |
|
"β U", |
|
"β Z", |
|
"β [", |
|
"β ^", |
|
"β `", |
|
"β k", |
|
"β p", |
|
"β ~", |
|
"β Β", |
|
"β *)(+'.*6(+'.*D(+'.*C", |
|
"(+'.* )(+'.*6(+'.*D(+'.*C", |
|
"β*)(+'.*6(+'.*D(+'.*C (+'.*)(+'.*6(+'.*D(+'.*C", |
|
"β*)(+'.*6(+'.*D(+'.*C(+'.*)(+'.*6(+'.*D(+'.*C (+'.", |
|
"β \\", |
|
"β _", |
|
"β a", |
|
"β l", |
|
"β r", |
|
"β ", |
|
"β Β" |
|
] |
|
} |
|
} |