justus-tobias commited on
Commit
2ace8c2
·
1 Parent(s): d8213a7
Files changed (1) hide show
  1. app.py +26 -14
app.py CHANGED
@@ -95,27 +95,36 @@ def convert2wav(audio):
95
  ##########################################################################################################
96
 
97
  def process_audio(audio, instream):
 
 
 
 
 
98
  print("Audio recieved")
99
  if audio is None:
100
  return gr.update(), instream
101
 
102
- if instream is None:
103
- instream = (24000, torch.randn(1, 1, 24000 * 10).squeeze().cpu().numpy())
104
- print("STREAM RECIEVED")
105
- stream = (audio[0], np.concatenate((instream[1], audio[1])))
 
106
 
107
- # Assuming instream[1] and audio[1] are valid inputs for convert2wav
108
- wav1 = convert2wav(instream)
109
- wav2 = convert2wav(audio)
110
 
111
- # Concatenate along the last dimension (time axis)
112
- combined_wav = torch.cat((wav1, wav2), dim=2)
113
- print("WAV COMBINED")
114
 
115
- mimi_codes = compute_codes(combined_wav)
116
- outwav = generate_reponse(mimi_codes)
 
 
 
117
 
118
- return gr.update(value=None), (24000, outwav.squeeze().cpu().numpy()), stream
119
 
120
 
121
  with gr.Blocks() as demo:
@@ -150,11 +159,12 @@ Monologue” method significantly improves the linguistic quality of generated s
150
  output_audio = gr.Audio(label="Processed Audio", streaming=True, autoplay=True)
151
  stream = gr.State()
152
 
 
153
 
154
  input_audio.stop_recording(
155
  fn=process_audio,
156
  inputs=[input_audio, stream],
157
- outputs=[input_audio, output_audio, stream]
158
  )
159
 
160
  with gr.Row():
@@ -173,4 +183,6 @@ Monologue” method significantly improves the linguistic quality of generated s
173
  elem_id="citation-button",
174
  show_copy_button=True,
175
  )
 
 
176
  demo.launch(debug=True)
 
95
  ##########################################################################################################
96
 
97
  def process_audio(audio, instream):
98
+ log_out = ""
99
+ outwav = torch.randn(1, 1, 24000 * 10)
100
+ stream = torch.randn(1, 1, 24000 * 10)
101
+
102
+
103
  print("Audio recieved")
104
  if audio is None:
105
  return gr.update(), instream
106
 
107
+ try:
108
+ if instream is None:
109
+ instream = (24000, torch.randn(1, 1, 24000 * 10).squeeze().cpu().numpy())
110
+ print("STREAM RECIEVED")
111
+ stream = (audio[0], np.concatenate((instream[1], audio[1])))
112
 
113
+ # Assuming instream[1] and audio[1] are valid inputs for convert2wav
114
+ wav1 = convert2wav(instream)
115
+ wav2 = convert2wav(audio)
116
 
117
+ # Concatenate along the last dimension (time axis)
118
+ combined_wav = torch.cat((wav1, wav2), dim=2)
119
+ print("WAV COMBINED")
120
 
121
+ mimi_codes = compute_codes(combined_wav)
122
+ print("CODES COMPUTED")
123
+ outwav = generate_reponse(mimi_codes)
124
+ except Exception as e:
125
+ return gr.update(value=None), (24000, outwav.squeeze().cpu().numpy()), stream, gr.update(visible=True,value=f"LOG: {e}")
126
 
127
+ return gr.update(value=None), (24000, outwav.squeeze().cpu().numpy()), stream, gr.update(visible=False)
128
 
129
 
130
  with gr.Blocks() as demo:
 
159
  output_audio = gr.Audio(label="Processed Audio", streaming=True, autoplay=True)
160
  stream = gr.State()
161
 
162
+ log_out = gr.Textbox("Log", visible=False)
163
 
164
  input_audio.stop_recording(
165
  fn=process_audio,
166
  inputs=[input_audio, stream],
167
+ outputs=[input_audio, output_audio, stream, log_out]
168
  )
169
 
170
  with gr.Row():
 
183
  elem_id="citation-button",
184
  show_copy_button=True,
185
  )
186
+
187
+
188
  demo.launch(debug=True)