csukuangfj commited on
Commit
9194752
·
1 Parent(s): 0e6190d

small fixes

Browse files
Files changed (3) hide show
  1. app.py +18 -11
  2. examples.py +47 -27
  3. test_wavs/tal_csasr/0.wav +0 -0
app.py CHANGED
@@ -60,7 +60,11 @@ def process_uploaded_file(
60
  in_filename: str,
61
  ):
62
  if in_filename is None or in_filename == "":
63
- return ""
 
 
 
 
64
 
65
  logging.info(f"Processing uploaded file: {in_filename}")
66
  try:
@@ -73,7 +77,7 @@ def process_uploaded_file(
73
  )
74
  except Exception as e:
75
  logging.info(str(e))
76
- return str(e)
77
 
78
 
79
  def process_microphone(
@@ -84,10 +88,11 @@ def process_microphone(
84
  in_filename: str,
85
  ):
86
  if in_filename is None or in_filename == "":
87
- return (
88
  "Please first click 'Record from microphone', speak, "
89
  "click 'Stop recording', and then "
90
- "click the button 'submit for recognition'"
 
91
  )
92
 
93
  logging.info(f"Processing microphone: {in_filename}")
@@ -101,7 +106,7 @@ def process_microphone(
101
  )
102
  except Exception as e:
103
  logging.info(str(e))
104
- return str(e)
105
 
106
 
107
  @torch.no_grad()
@@ -136,7 +141,7 @@ def process(
136
  s.accept_wave_file(filename)
137
  recognizer.decode_stream(s)
138
 
139
- text = s.result.text.strip()
140
 
141
  date_time = now.strftime("%Y-%m-%d %H:%M:%S.%f")
142
  end = time.time()
@@ -161,7 +166,7 @@ def process(
161
  logging.info(info)
162
  logging.info(f"\nrepo_id: {repo_id}\nhyp: {text}")
163
 
164
- return text
165
 
166
 
167
  title = "# Automatic Speech Recognition with Next-gen Kaldi"
@@ -246,6 +251,7 @@ with demo:
246
  )
247
  upload_button = gr.Button("Submit for recognition")
248
  uploaded_output = gr.Textbox(label="Recognized speech from uploaded file")
 
249
 
250
  gr.Examples(
251
  examples=examples,
@@ -256,7 +262,7 @@ with demo:
256
  num_active_paths_slider,
257
  uploaded_file,
258
  ],
259
- outputs=[uploaded_output],
260
  fn=process_uploaded_file,
261
  )
262
 
@@ -270,6 +276,7 @@ with demo:
270
 
271
  record_button = gr.Button("Submit for recognition")
272
  recorded_output = gr.Textbox(label="Recognized speech from recordings")
 
273
 
274
  gr.Examples(
275
  examples=examples,
@@ -280,7 +287,7 @@ with demo:
280
  num_active_paths_slider,
281
  microphone,
282
  ],
283
- outputs=[recorded_output],
284
  fn=process_microphone,
285
  )
286
 
@@ -293,7 +300,7 @@ with demo:
293
  num_active_paths_slider,
294
  uploaded_file,
295
  ],
296
- outputs=[uploaded_output],
297
  )
298
 
299
  record_button.click(
@@ -305,7 +312,7 @@ with demo:
305
  num_active_paths_slider,
306
  microphone,
307
  ],
308
- outputs=[recorded_output],
309
  )
310
  gr.Markdown(description)
311
 
 
60
  in_filename: str,
61
  ):
62
  if in_filename is None or in_filename == "":
63
+ return "", build_html_output(
64
+ "Please first upload a file and then click "
65
+ 'the button "submit for recognition"',
66
+ "result_item_error",
67
+ )
68
 
69
  logging.info(f"Processing uploaded file: {in_filename}")
70
  try:
 
77
  )
78
  except Exception as e:
79
  logging.info(str(e))
80
+ return "", build_html_output(str(e), "result_item_error")
81
 
82
 
83
  def process_microphone(
 
88
  in_filename: str,
89
  ):
90
  if in_filename is None or in_filename == "":
91
+ return "", build_html_output(
92
  "Please first click 'Record from microphone', speak, "
93
  "click 'Stop recording', and then "
94
+ "click the button 'submit for recognition'",
95
+ "result_item_error",
96
  )
97
 
98
  logging.info(f"Processing microphone: {in_filename}")
 
106
  )
107
  except Exception as e:
108
  logging.info(str(e))
109
+ return "", build_html_output(str(e), "result_item_error")
110
 
111
 
112
  @torch.no_grad()
 
141
  s.accept_wave_file(filename)
142
  recognizer.decode_stream(s)
143
 
144
+ text = s.result.text
145
 
146
  date_time = now.strftime("%Y-%m-%d %H:%M:%S.%f")
147
  end = time.time()
 
166
  logging.info(info)
167
  logging.info(f"\nrepo_id: {repo_id}\nhyp: {text}")
168
 
169
+ return text, build_html_output(info)
170
 
171
 
172
  title = "# Automatic Speech Recognition with Next-gen Kaldi"
 
251
  )
252
  upload_button = gr.Button("Submit for recognition")
253
  uploaded_output = gr.Textbox(label="Recognized speech from uploaded file")
254
+ uploaded_html_info = gr.HTML(label="Info")
255
 
256
  gr.Examples(
257
  examples=examples,
 
262
  num_active_paths_slider,
263
  uploaded_file,
264
  ],
265
+ outputs=[uploaded_output, uploaded_html_info],
266
  fn=process_uploaded_file,
267
  )
268
 
 
276
 
277
  record_button = gr.Button("Submit for recognition")
278
  recorded_output = gr.Textbox(label="Recognized speech from recordings")
279
+ recorded_html_info = gr.HTML(label="Info")
280
 
281
  gr.Examples(
282
  examples=examples,
 
287
  num_active_paths_slider,
288
  microphone,
289
  ],
290
+ outputs=[recorded_output, recorded_html_info],
291
  fn=process_microphone,
292
  )
293
 
 
300
  num_active_paths_slider,
301
  uploaded_file,
302
  ],
303
+ outputs=[uploaded_output, uploaded_html_info],
304
  )
305
 
306
  record_button.click(
 
312
  num_active_paths_slider,
313
  microphone,
314
  ],
315
+ outputs=[recorded_output, recorded_html_info],
316
  )
317
  gr.Markdown(description)
318
 
examples.py CHANGED
@@ -16,6 +16,48 @@
16
  # See the License for the specific language governing permissions and
17
  # limitations under the License.
18
  examples = [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  # librispeech
20
  # https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless5-2022-05-13/tree/main/test_wavs
21
  [
@@ -154,57 +196,42 @@ examples = [
154
  "./test_wavs/aidatatang_200zh/T0055G0036S0004.wav",
155
  ],
156
  # tal_csasr
157
- # https://huggingface.co/luomingshuang/icefall_asr_tal-csasr_pruned_transducer_stateless5/tree/main/test_wavs
158
  [
159
  "Chinese+English",
160
- "luomingshuang/icefall_asr_tal-csasr_pruned_transducer_stateless5",
161
  "greedy_search",
162
  4,
163
  "./test_wavs/tal_csasr/210_36476_210_8341_1_1533271973_7057520_132.wav",
164
  ],
165
  [
166
  "Chinese+English",
167
- "luomingshuang/icefall_asr_tal-csasr_pruned_transducer_stateless5",
168
  "greedy_search",
169
  4,
170
  "./test_wavs/tal_csasr/210_36476_210_8341_1_1533271973_7057520_138.wav",
171
  ],
172
  [
173
  "Chinese+English",
174
- "luomingshuang/icefall_asr_tal-csasr_pruned_transducer_stateless5",
175
  "greedy_search",
176
  4,
177
  "./test_wavs/tal_csasr/210_36476_210_8341_1_1533271973_7057520_145.wav",
178
  ],
179
  [
180
  "Tibetan",
181
- "syzym/icefall-asr-xbmu-amdo31-pruned-transducer-stateless5-2022-11-29",
182
  "greedy_search",
183
  4,
184
  "./test_wavs/tibetan/a_0_cacm-A70_31116.wav",
185
  ],
186
  [
187
  "Tibetan",
188
- "syzym/icefall-asr-xbmu-amdo31-pruned-transducer-stateless5-2022-11-29",
189
- "greedy_search",
190
- 4,
191
- "./test_wavs/tibetan/a_0_cacm-A70_31117.wav",
192
- ],
193
- [
194
- "Tibetan",
195
- "syzym/icefall-asr-xbmu-amdo31-pruned-transducer-stateless5-2022-11-29",
196
  "greedy_search",
197
  4,
198
  "./test_wavs/tibetan/a_0_cacm-A70_31118.wav",
199
  ],
200
  # arabic
201
- [
202
- "Arabic",
203
- "AmirHussein/icefall-asr-mgb2-conformer_ctc-2022-27-06",
204
- "greedy_search",
205
- 4,
206
- "./test_wavs/arabic/a.wav",
207
- ],
208
  [
209
  "Arabic",
210
  "AmirHussein/icefall-asr-mgb2-conformer_ctc-2022-27-06",
@@ -226,11 +253,4 @@ examples = [
226
  4,
227
  "./test_wavs/german/20120315-0900-PLENARY-14-de_20120315.wav",
228
  ],
229
- [
230
- "German",
231
- "csukuangfj/wav2vec2.0-torchaudio",
232
- "greedy_search",
233
- 4,
234
- "./test_wavs/german/20170517-0900-PLENARY-16-de_20170517.wav",
235
- ],
236
  ]
 
16
  # See the License for the specific language governing permissions and
17
  # limitations under the License.
18
  examples = [
19
+ [
20
+ "Chinese+English",
21
+ "ptrnull/icefall-asr-conv-emformer-transducer-stateless2-zh",
22
+ "greedy_search",
23
+ 4,
24
+ "./test_wavs/tal_csasr/0.wav",
25
+ ],
26
+ [
27
+ "English",
28
+ "csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13",
29
+ "greedy_search",
30
+ 4,
31
+ "./test_wavs/librispeech/1089-134686-0001.wav",
32
+ ],
33
+ [
34
+ "Chinese",
35
+ "luomingshuang/icefall_asr_wenetspeech_pruned_transducer_stateless2",
36
+ "greedy_search",
37
+ 4,
38
+ "./test_wavs/wenetspeech/DEV_T0000000000.opus",
39
+ ],
40
+ [
41
+ "German",
42
+ "csukuangfj/wav2vec2.0-torchaudio",
43
+ "greedy_search",
44
+ 4,
45
+ "./test_wavs/german/20170517-0900-PLENARY-16-de_20170517.wav",
46
+ ],
47
+ [
48
+ "Arabic",
49
+ "AmirHussein/icefall-asr-mgb2-conformer_ctc-2022-27-06",
50
+ "greedy_search",
51
+ 4,
52
+ "./test_wavs/arabic/a.wav",
53
+ ],
54
+ [
55
+ "Tibetan",
56
+ "syzym/icefall-asr-xbmu-amdo31-pruned-transducer-stateless7-2022-12-02",
57
+ "greedy_search",
58
+ 4,
59
+ "./test_wavs/tibetan/a_0_cacm-A70_31117.wav",
60
+ ],
61
  # librispeech
62
  # https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless5-2022-05-13/tree/main/test_wavs
63
  [
 
196
  "./test_wavs/aidatatang_200zh/T0055G0036S0004.wav",
197
  ],
198
  # tal_csasr
 
199
  [
200
  "Chinese+English",
201
+ "ptrnull/icefall-asr-conv-emformer-transducer-stateless2-zh",
202
  "greedy_search",
203
  4,
204
  "./test_wavs/tal_csasr/210_36476_210_8341_1_1533271973_7057520_132.wav",
205
  ],
206
  [
207
  "Chinese+English",
208
+ "ptrnull/icefall-asr-conv-emformer-transducer-stateless2-zh",
209
  "greedy_search",
210
  4,
211
  "./test_wavs/tal_csasr/210_36476_210_8341_1_1533271973_7057520_138.wav",
212
  ],
213
  [
214
  "Chinese+English",
215
+ "ptrnull/icefall-asr-conv-emformer-transducer-stateless2-zh",
216
  "greedy_search",
217
  4,
218
  "./test_wavs/tal_csasr/210_36476_210_8341_1_1533271973_7057520_145.wav",
219
  ],
220
  [
221
  "Tibetan",
222
+ "syzym/icefall-asr-xbmu-amdo31-pruned-transducer-stateless7-2022-12-02",
223
  "greedy_search",
224
  4,
225
  "./test_wavs/tibetan/a_0_cacm-A70_31116.wav",
226
  ],
227
  [
228
  "Tibetan",
229
+ "syzym/icefall-asr-xbmu-amdo31-pruned-transducer-stateless7-2022-12-02",
 
 
 
 
 
 
 
230
  "greedy_search",
231
  4,
232
  "./test_wavs/tibetan/a_0_cacm-A70_31118.wav",
233
  ],
234
  # arabic
 
 
 
 
 
 
 
235
  [
236
  "Arabic",
237
  "AmirHussein/icefall-asr-mgb2-conformer_ctc-2022-27-06",
 
253
  4,
254
  "./test_wavs/german/20120315-0900-PLENARY-14-de_20120315.wav",
255
  ],
 
 
 
 
 
 
 
256
  ]
test_wavs/tal_csasr/0.wav ADDED
Binary file (778 kB). View file