lIlBrother
commited on
Commit
·
9876cd0
1
Parent(s):
1fad393
Init: Model config
Browse files- alphabet.json +1 -0
- language_model/attrs.json +1 -0
- preprocessor_config.json +10 -0
- special_tokens_map.json +6 -0
- tokenizer_config.json +13 -0
- vocab.json +74 -0
alphabet.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"labels": ["", "\u2047", "<s>", "</s>", " ", "\u1171", "\u11b4", "\u1165", "\u11ae", "\u110c", "\u116a", "\u110e", "\u11b3", "\u11bf", "\u116b", "\u11c1", "\u1163", "\u11aa", "\u110d", "\u1173", "\u11ba", "\u1169", "\u1174", "\u1112", "\u11c2", "\u11ab", "\u11b5", "\u1167", "\u11b6", "\u1168", "\u1161", "\u11ad", "\u1170", "\u11bd", "\u11b8", "\u11b1", "\u1109", "\u11bb", "\u11af", "\u116d", "\u1103", "\u11a9", "\u1175", "\u1101", "\u1111", "\u1162", "\u1110", "\u1164", "\u1108", "\u116e", "\u1104", "\u1102", "\u116f", "\u110a", "\u1105", "\u11b7", "\u1106", "\u11b9", "\u116c", "\u1100", "\u11ac", "\u1107", "\u1166", "\u11b0", "\u11bc", "\u11b2", "\u11be", "\u110b", "\u11c0", "\u11a8", "\u110f", "\u1172"], "is_bpe": false}
|
language_model/attrs.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"alpha": 0.5, "beta": 1.5, "unk_score_offset": -10.0, "score_boundary": false}
|
preprocessor_config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"do_normalize": true,
|
3 |
+
"feature_extractor_type": "Wav2Vec2FeatureExtractor",
|
4 |
+
"feature_size": 1,
|
5 |
+
"padding_side": "right",
|
6 |
+
"padding_value": 0.0,
|
7 |
+
"processor_class": "Wav2Vec2ProcessorWithLM",
|
8 |
+
"return_attention_mask": true,
|
9 |
+
"sampling_rate": 16000
|
10 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "<s>",
|
3 |
+
"eos_token": "</s>",
|
4 |
+
"pad_token": "<pad>",
|
5 |
+
"unk_token": "<unk>"
|
6 |
+
}
|
tokenizer_config.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "<s>",
|
3 |
+
"do_lower_case": false,
|
4 |
+
"eos_token": "</s>",
|
5 |
+
"name_or_path": "/data_raid0/TADEV_BIG_DATA/ASR/STT/model/fine-tuning/42maru/wav2vec2-base-4data",
|
6 |
+
"pad_token": "<pad>",
|
7 |
+
"processor_class": "Wav2Vec2ProcessorWithLM",
|
8 |
+
"replace_word_delimiter_char": " ",
|
9 |
+
"special_tokens_map_file": "/DATA01/bart/workspace/stt/output_dir/special_tokens_map.json",
|
10 |
+
"tokenizer_class": "Wav2Vec2CTCTokenizer",
|
11 |
+
"unk_token": "<unk>",
|
12 |
+
"word_delimiter_token": "|"
|
13 |
+
}
|
vocab.json
ADDED
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"<pad>": 0,
|
3 |
+
"<unk>": 1,
|
4 |
+
"<s>": 2,
|
5 |
+
"</s>": 3,
|
6 |
+
"|": 4,
|
7 |
+
"\u1171": 5,
|
8 |
+
"\u11b4": 6,
|
9 |
+
"\u1165": 7,
|
10 |
+
"\u11ae": 8,
|
11 |
+
"\u110c": 9,
|
12 |
+
"\u116a": 10,
|
13 |
+
"\u110e": 11,
|
14 |
+
"\u11b3": 12,
|
15 |
+
"\u11bf": 13,
|
16 |
+
"\u116b": 14,
|
17 |
+
"\u11c1": 15,
|
18 |
+
"\u1163": 16,
|
19 |
+
"\u11aa": 17,
|
20 |
+
"\u110d": 18,
|
21 |
+
"\u1173": 19,
|
22 |
+
"\u11ba": 20,
|
23 |
+
"\u1169": 21,
|
24 |
+
"\u1174": 22,
|
25 |
+
"\u1112": 23,
|
26 |
+
"\u11c2": 24,
|
27 |
+
"\u11ab": 25,
|
28 |
+
"\u11b5": 26,
|
29 |
+
"\u1167": 27,
|
30 |
+
"\u11b6": 28,
|
31 |
+
"\u1168": 29,
|
32 |
+
"\u1161": 30,
|
33 |
+
"\u11ad": 31,
|
34 |
+
"\u1170": 32,
|
35 |
+
"\u11bd": 33,
|
36 |
+
"\u11b8": 34,
|
37 |
+
"\u11b1": 35,
|
38 |
+
"\u1109": 36,
|
39 |
+
"\u11bb": 37,
|
40 |
+
"\u11af": 38,
|
41 |
+
"\u116d": 39,
|
42 |
+
"\u1103": 40,
|
43 |
+
"\u11a9": 41,
|
44 |
+
"\u1175": 42,
|
45 |
+
"\u1101": 43,
|
46 |
+
"\u1111": 44,
|
47 |
+
"\u1162": 45,
|
48 |
+
"\u1110": 46,
|
49 |
+
"\u1164": 47,
|
50 |
+
"\u1108": 48,
|
51 |
+
"\u116e": 49,
|
52 |
+
"\u1104": 50,
|
53 |
+
"\u1102": 51,
|
54 |
+
"\u116f": 52,
|
55 |
+
"\u110a": 53,
|
56 |
+
"\u1105": 54,
|
57 |
+
"\u11b7": 55,
|
58 |
+
"\u1106": 56,
|
59 |
+
"\u11b9": 57,
|
60 |
+
"\u116c": 58,
|
61 |
+
"\u1100": 59,
|
62 |
+
"\u11ac": 60,
|
63 |
+
"\u1107": 61,
|
64 |
+
"\u1166": 62,
|
65 |
+
"\u11b0": 63,
|
66 |
+
"\u11bc": 64,
|
67 |
+
"\u11b2": 65,
|
68 |
+
"\u11be": 66,
|
69 |
+
"\u110b": 67,
|
70 |
+
"\u11c0": 68,
|
71 |
+
"\u11a8": 69,
|
72 |
+
"\u110f": 70,
|
73 |
+
"\u1172": 71
|
74 |
+
}
|