csukuangfj commited on
Commit
08d2e6b
·
1 Parent(s): 19c8525
README.md CHANGED
@@ -1,8 +1,8 @@
1
  ---
2
- title: Test
3
- emoji: 📚
4
- colorFrom: green
5
- colorTo: gray
6
  sdk: gradio
7
  sdk_version: 3.0.26
8
  app_file: app.py
 
1
  ---
2
+ title: Automatic Speech Recognition
3
+ emoji: 🌖
4
+ colorFrom: yellow
5
+ colorTo: green
6
  sdk: gradio
7
  sdk_version: 3.0.26
8
  app_file: app.py
examples.py CHANGED
@@ -176,4 +176,25 @@ examples = [
176
  4,
177
  "./test_wavs/tal_csasr/210_36476_210_8341_1_1533271973_7057520_145.wav",
178
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
  ]
 
176
  4,
177
  "./test_wavs/tal_csasr/210_36476_210_8341_1_1533271973_7057520_145.wav",
178
  ],
179
+ [
180
+ "Tibetan",
181
+ "syzym/icefall-asr-xbmu-amdo31-pruned-transducer-stateless5-2022-11-29",
182
+ "greedy_search",
183
+ 4,
184
+ "./test_wavs/tibetan/a_0_cacm-A70_31116.wav",
185
+ ],
186
+ [
187
+ "Tibetan",
188
+ "syzym/icefall-asr-xbmu-amdo31-pruned-transducer-stateless5-2022-11-29",
189
+ "greedy_search",
190
+ 4,
191
+ "./test_wavs/tibetan/a_0_cacm-A70_31117.wav",
192
+ ],
193
+ [
194
+ "Tibetan",
195
+ "syzym/icefall-asr-xbmu-amdo31-pruned-transducer-stateless5-2022-11-29",
196
+ "greedy_search",
197
+ 4,
198
+ "./test_wavs/tibetan/a_0_cacm-A70_31118.wav",
199
+ ],
200
  ]
model.py CHANGED
@@ -31,6 +31,8 @@ def get_pretrained_model(repo_id: str) -> OfflineAsr:
31
  return english_models[repo_id](repo_id)
32
  elif repo_id in chinese_english_mixed_models:
33
  return chinese_english_mixed_models[repo_id](repo_id)
 
 
34
  else:
35
  raise ValueError(f"Unsupported repo_id: {repo_id}")
36
 
@@ -122,12 +124,28 @@ def _get_gigaspeech_pre_trained_model(repo_id: str) -> OfflineAsr:
122
  @lru_cache(maxsize=10)
123
  def _get_librispeech_pre_trained_model(repo_id: str) -> OfflineAsr:
124
  assert repo_id in [
 
125
  "csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13", # noqa
 
 
126
  ], repo_id
127
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  nn_model_filename = _get_nn_model_filename(
129
  repo_id=repo_id,
130
- filename="cpu_jit.pt",
131
  )
132
  bpe_model_filename = _get_bpe_model_filename(repo_id=repo_id)
133
 
@@ -224,16 +242,47 @@ def _get_aidatatang_200zh_pretrained_mode(repo_id: str):
224
  )
225
 
226
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  chinese_models = {
228
  "luomingshuang/icefall_asr_wenetspeech_pruned_transducer_stateless2": _get_wenetspeech_pre_trained_model, # noqa
229
  "yuekai/icefall-asr-aishell2-pruned-transducer-stateless5-A-2022-07-12": _get_aishell2_pretrained_model, # noqa
230
  "yuekai/icefall-asr-aishell2-pruned-transducer-stateless5-B-2022-07-12": _get_aishell2_pretrained_model, # noqa
231
- "luomingshuang/icefall_asr_alimeeting_pruned_transducer_stateless2": _get_alimeeting_pre_trained_model, # noqa
232
  "luomingshuang/icefall_asr_aidatatang-200zh_pruned_transducer_stateless2": _get_aidatatang_200zh_pretrained_mode, # noqa
 
233
  }
234
 
235
  english_models = {
236
  "wgb14/icefall-asr-gigaspeech-pruned-transducer-stateless2": _get_gigaspeech_pre_trained_model, # noqa
 
 
 
237
  "csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13": _get_librispeech_pre_trained_model, # noqa
238
  }
239
 
@@ -241,14 +290,21 @@ chinese_english_mixed_models = {
241
  "luomingshuang/icefall_asr_tal-csasr_pruned_transducer_stateless5": _get_tal_csasr_pre_trained_model, # noqa
242
  }
243
 
 
 
 
 
 
244
  all_models = {
245
  **chinese_models,
246
  **english_models,
247
  **chinese_english_mixed_models,
 
248
  }
249
 
250
  language_to_models = {
251
  "Chinese": list(chinese_models.keys()),
252
  "English": list(english_models.keys()),
253
  "Chinese+English": list(chinese_english_mixed_models.keys()),
 
254
  }
 
31
  return english_models[repo_id](repo_id)
32
  elif repo_id in chinese_english_mixed_models:
33
  return chinese_english_mixed_models[repo_id](repo_id)
34
+ elif repo_id in tibetan_models:
35
+ return tibetan_models[repo_id](repo_id)
36
  else:
37
  raise ValueError(f"Unsupported repo_id: {repo_id}")
38
 
 
124
  @lru_cache(maxsize=10)
125
  def _get_librispeech_pre_trained_model(repo_id: str) -> OfflineAsr:
126
  assert repo_id in [
127
+ "WeijiZhuang/icefall-asr-librispeech-pruned-transducer-stateless8-2022-12-02", # noqa
128
  "csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13", # noqa
129
+ "csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless7-2022-11-11", # noqa
130
+ "csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless8-2022-11-14", # noqa
131
  ], repo_id
132
 
133
+ filename = "cpu_jit.pt"
134
+ if (
135
+ repo_id
136
+ == "csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless7-2022-11-11"
137
+ ):
138
+ filename = "cpu_jit-torch-1.10.0.pt"
139
+
140
+ if (
141
+ repo_id
142
+ == "WeijiZhuang/icefall-asr-librispeech-pruned-transducer-stateless8-2022-12-02"
143
+ ):
144
+ filename = "cpu_jit-torch-1.10.pt"
145
+
146
  nn_model_filename = _get_nn_model_filename(
147
  repo_id=repo_id,
148
+ filename=filename,
149
  )
150
  bpe_model_filename = _get_bpe_model_filename(repo_id=repo_id)
151
 
 
242
  )
243
 
244
 
245
+ @lru_cache(maxsize=10)
246
+ def _get_tibetan_pre_trained_model(repo_id: str):
247
+ assert repo_id in [
248
+ "syzym/icefall-asr-xbmu-amdo31-pruned-transducer-stateless7-2022-12-02",
249
+ "syzym/icefall-asr-xbmu-amdo31-pruned-transducer-stateless5-2022-11-29",
250
+ ], repo_id
251
+
252
+ filename = "cpu_jit.pt"
253
+ if (
254
+ repo_id
255
+ == "syzym/icefall-asr-xbmu-amdo31-pruned-transducer-stateless5-2022-11-29"
256
+ ):
257
+ nn_model_filename = _get_nn_model_filename(
258
+ repo_id=repo_id,
259
+ filename="cpu_jit-epoch-28-avg-23-torch-1.10.0.pt",
260
+ )
261
+
262
+ bpe_model_filename = _get_bpe_model_filename(repo_id=repo_id)
263
+
264
+ return OfflineAsr(
265
+ nn_model_filename=nn_model_filename,
266
+ bpe_model_filename=bpe_model_filename,
267
+ token_filename=None,
268
+ sample_rate=sample_rate,
269
+ device="cpu",
270
+ )
271
+
272
+
273
  chinese_models = {
274
  "luomingshuang/icefall_asr_wenetspeech_pruned_transducer_stateless2": _get_wenetspeech_pre_trained_model, # noqa
275
  "yuekai/icefall-asr-aishell2-pruned-transducer-stateless5-A-2022-07-12": _get_aishell2_pretrained_model, # noqa
276
  "yuekai/icefall-asr-aishell2-pruned-transducer-stateless5-B-2022-07-12": _get_aishell2_pretrained_model, # noqa
 
277
  "luomingshuang/icefall_asr_aidatatang-200zh_pruned_transducer_stateless2": _get_aidatatang_200zh_pretrained_mode, # noqa
278
+ "luomingshuang/icefall_asr_alimeeting_pruned_transducer_stateless2": _get_alimeeting_pre_trained_model, # noqa
279
  }
280
 
281
  english_models = {
282
  "wgb14/icefall-asr-gigaspeech-pruned-transducer-stateless2": _get_gigaspeech_pre_trained_model, # noqa
283
+ "WeijiZhuang/icefall-asr-librispeech-pruned-transducer-stateless8-2022-12-02": _get_librispeech_pre_trained_model, # noqa
284
+ "csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless8-2022-11-14": _get_librispeech_pre_trained_model, # noqa
285
+ "csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless7-2022-11-11": _get_librispeech_pre_trained_model, # noqa
286
  "csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13": _get_librispeech_pre_trained_model, # noqa
287
  }
288
 
 
290
  "luomingshuang/icefall_asr_tal-csasr_pruned_transducer_stateless5": _get_tal_csasr_pre_trained_model, # noqa
291
  }
292
 
293
+ tibetan_models = {
294
+ "syzym/icefall-asr-xbmu-amdo31-pruned-transducer-stateless7-2022-12-02": _get_tibetan_pre_trained_model, # noqa
295
+ "syzym/icefall-asr-xbmu-amdo31-pruned-transducer-stateless5-2022-11-29": _get_tibetan_pre_trained_model, # noqa
296
+ }
297
+
298
  all_models = {
299
  **chinese_models,
300
  **english_models,
301
  **chinese_english_mixed_models,
302
+ **tibetan_models,
303
  }
304
 
305
  language_to_models = {
306
  "Chinese": list(chinese_models.keys()),
307
  "English": list(english_models.keys()),
308
  "Chinese+English": list(chinese_english_mixed_models.keys()),
309
+ "Tibetan": list(tibetan_models.keys()),
310
  }
requirements.txt CHANGED
@@ -1,11 +1,9 @@
1
- https://download.pytorch.org/whl/cpu/torch-1.10.0%2Bcpu-cp38-cp38-linux_x86_64.whl
2
- https://k2-fsa.org/nightly/whl/k2-1.17.dev20220711+cpu.torch1.10.0-cp38-cp38-linux_x86_64.whl
3
- https://download.pytorch.org/whl/cpu/torchaudio-0.10.0%2Bcpu-cp38-cp38-linux_x86_64.whl
4
-
5
-
6
- https://huggingface.co/csukuangfj/wheels/resolve/main/kaldifeat-1.17-cp38-cp38-linux_x86_64.whl
7
- https://huggingface.co/csukuangfj/wheels/resolve/main/k2_sherpa-0.6-cp38-cp38-linux_x86_64.whl
8
 
 
 
 
9
 
10
  sentencepiece>=0.1.96
11
  numpy
 
1
+ https://download.pytorch.org/whl/cpu/torch-1.13.0%2Bcpu-cp38-cp38-linux_x86_64.whl
2
+ https://download.pytorch.org/whl/cpu/torchaudio-0.13.0%2Bcpu-cp38-cp38-linux_x86_64.whl
 
 
 
 
 
3
 
4
+ https://huggingface.co/csukuangfj/wheels/resolve/main/k2-1.23.2.dev20221204%2Bcpu.torch1.13.0-cp38-cp38-linux_x86_64.whl
5
+ https://huggingface.co/csukuangfj/wheels/resolve/main/kaldifeat-1.22-cp38-cp38-linux_x86_64.whl
6
+ https://huggingface.co/csukuangfj/wheels/resolve/main/k2_sherpa-1.1-cp38-cp38-linux_x86_64.whl
7
 
8
  sentencepiece>=0.1.96
9
  numpy
test_wavs/tibetan/a_0_cacm-A70_31116.wav ADDED
Binary file (97.4 kB). View file
 
test_wavs/tibetan/a_0_cacm-A70_31117.wav ADDED
Binary file (128 kB). View file
 
test_wavs/tibetan/a_0_cacm-A70_31118.wav ADDED
Binary file (87.1 kB). View file
 
test_wavs/tibetan/trans.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ a_0_cacm-A70_31116.wav ལོ བཅུ ཙམ མ འདང བའི དུས སྐབས ནང
2
+ a_0_cacm-A70_31117.wav དྲག པོའི ངོ ལོག ཟིང འཁྲུག སྒྲིག འཛུགས དང ངན བཀོད བྱས ཡོད
3
+ a_0_cacm-A70_31118.wav གནས བབ འདིའི རིགས གང མགྱོགས འགྱུར བ གཏོང དགོས