Tonic commited on
Commit
78e56be
·
1 Parent(s): 3cb13e6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -14
app.py CHANGED
@@ -36,7 +36,7 @@ def save_audio(audio_input, output_dir="saved_audio"):
36
  def speech_to_text(audio_data, tgt_lang):
37
  file_path = save_audio(audio_data)
38
  audio_input, _ = torchaudio.load(file_path)
39
- s2t_model = torch.jit.load("unity_on_device_s2t.ptl")
40
  with torch.no_grad():
41
  text = s2t_model(audio_input, tgt_lang=languages[tgt_lang])
42
 
@@ -48,21 +48,15 @@ def speech_to_text(audio_data, tgt_lang):
48
  def speech_to_speech_translation(audio_data, tgt_lang):
49
  file_path = save_audio(audio_data)
50
  audio_input, _ = torchaudio.load(file_path)
51
- s2st_model = torch.jit.load("unity_on_device_s2t.ptl")
 
52
  with torch.no_grad():
53
- model_output = s2st_model(audio_input, tgt_lang=languages[tgt_lang])
54
 
55
- # Print the model's output for debugging
56
- print("Speech to Speech Translation Model Output:", model_output)
57
-
58
- # Check the structure of model_output and unpack accordingly
59
- if len(model_output) == 3:
60
- text, units, waveform = model_output
61
- elif len(model_output) == 2:
62
- text, waveform = model_output
63
- units = None # or some default value
64
- else:
65
- raise ValueError("Unexpected model output format")
66
 
67
  output_file = "/tmp/result.wav"
68
  torchaudio.save(output_file, waveform.unsqueeze(0), sample_rate=16000)
 
36
  def speech_to_text(audio_data, tgt_lang):
37
  file_path = save_audio(audio_data)
38
  audio_input, _ = torchaudio.load(file_path)
39
+ s2t_model = torch.jit.load("unity_on_device.ptl")
40
  with torch.no_grad():
41
  text = s2t_model(audio_input, tgt_lang=languages[tgt_lang])
42
 
 
48
  def speech_to_speech_translation(audio_data, tgt_lang):
49
  file_path = save_audio(audio_data)
50
  audio_input, _ = torchaudio.load(file_path)
51
+ s2st_model = torch.jit.load("unity_on_device.ptl")
52
+
53
  with torch.no_grad():
54
+ text, units, waveform = s2st_model(audio_input, tgt_lang=languages[tgt_lang])
55
 
56
+ # Print the model's output for debugging (optional)
57
+ print("Translated Text:", text)
58
+ print("Units:", units)
59
+ print("Waveform Shape:", waveform.shape)
 
 
 
 
 
 
 
60
 
61
  output_file = "/tmp/result.wav"
62
  torchaudio.save(output_file, waveform.unsqueeze(0), sample_rate=16000)