vumichien commited on
Commit
988375c
·
1 Parent(s): 973c318

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -5
app.py CHANGED
@@ -18,14 +18,19 @@ def process_audio_file(file):
18
  return inputs
19
 
20
 
21
- def transcribe(file, state=""):
22
- inputs = process_audio_file(file)
 
 
 
 
 
 
23
  with torch.no_grad():
24
  output_logit = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
25
  pred_ids = torch.argmax(output_logit, dim=-1)
26
  text = processor.batch_decode(pred_ids)[0]
27
- state += text + " "
28
- return state, state
29
 
30
 
31
  description = "A simple interface to transcribe from spoken Japanese to Hiragana."
@@ -34,7 +39,7 @@ article = "Author: <a href=\"https://huggingface.co/vumichien\">Vu Minh Chien</a
34
  inputs = [gr.Audio(source="microphone", type="filepath", optional=True),
35
  gr.Audio(source="upload", type="filepath", optional=True),
36
  ]
37
- outputs = ["textbox", "state"]
38
 
39
  examples = [["samples/BASIC5000_0001.wav",""],
40
  ["samples/BASIC5000_0005.wav",""]
 
18
  return inputs
19
 
20
 
21
+ def transcribe(micro, file):
22
+ if file is not None and micro is None:
23
+ input_audio = file
24
+ elif file is None and micro is not None:
25
+ input_audio = micro
26
+ else:
27
+ input_audio = file
28
+ inputs = process_audio_file(input_audio )
29
  with torch.no_grad():
30
  output_logit = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
31
  pred_ids = torch.argmax(output_logit, dim=-1)
32
  text = processor.batch_decode(pred_ids)[0]
33
+ return text
 
34
 
35
 
36
  description = "A simple interface to transcribe from spoken Japanese to Hiragana."
 
39
  inputs = [gr.Audio(source="microphone", type="filepath", optional=True),
40
  gr.Audio(source="upload", type="filepath", optional=True),
41
  ]
42
+ outputs = ["textbox"]
43
 
44
  examples = [["samples/BASIC5000_0001.wav",""],
45
  ["samples/BASIC5000_0005.wav",""]