EngNada commited on
Commit
88dc1f5
·
1 Parent(s): cdfa401

add tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +1 -1
vocab.json CHANGED
@@ -1 +1 @@
1
- {"ل": 0, "ف": 1, "آ": 2, "ض": 3, "ُ": 4, "ۖ": 5, "": 6, "ئ": 7, "ر": 8, "ش": 9, "ۚ": 10, "م": 11, "ء": 12, "«": 13, "د": 14, "ً": 15, "خ": 17, "ن": 18, "ت": 19, "ق": 20, "ط": 21, "َ": 22, "،": 23, "س": 24, "ک": 25, "»": 26, "ٰ": 27, "ث": 28, "": 29, "ع": 30, "ه": 31, "چ": 32, "ة": 33, "ى": 34, "و": 35, "ِ": 36, "ج": 37, "t": 38, "e": 39, "ب": 40, "ظ": 41, "ٌ": 42, "ْ": 43, "غ": 44, "ؤ": 45, "": 46, "ك": 47, "_": 48, "؛": 49, "g": 50, "ي": 51, "ص": 52, "ڨ": 53, "ز": 54, "": 55, "ذ": 56, "؟": 57, "أ": 58, "ھ": 59, "ـ": 60, "ٍ": 61, "إ": 62, "ّ": 63, "ح": 64, "ی": 65, "ا": 66, "|": 16, "[UNK]": 67, "[PAD]": 68}
 
1
+ {"ک": 0, "ب": 1, "ٌ": 2, "": 3, "َ": 4, "ج": 5, "ی": 6, "ز": 7, "ي": 8, "ظ": 9, "ّ": 10, "چ": 11, "ط": 12, "غ": 13, "t": 14, "ُ": 15, "": 16, "ك": 17, "«": 18, "ٰ": 19, "»": 20, "خ": 21, "ه": 22, "ث": 23, "ة": 24, "آ": 25, "ض": 26, "ش": 27, "_": 28, "؛": 29, "g": 30, "ۚ": 31, "ِ": 32, "ا": 33, "ً": 34, "": 35, "ء": 36, "ذ": 37, "د": 38, "": 39, "م": 40, "؟": 41, "ع": 43, "ـ": 44, "ٍ": 45, "ۖ": 46, "ل": 47, "إ": 48, "ى": 49, "ئ": 50, "e": 51, "ر": 52, "ص": 53, "ڨ": 54, "ھ": 55, "أ": 56, "ن": 57, "ق": 58, "ت": 59, "،": 60, "ح": 61, "ف": 62, "و": 63, "س": 64, "ؤ": 65, "ْ": 66, "|": 42, "[UNK]": 67, "[PAD]": 68}