Spaces:
Paused
Paused
Commit
·
2ace8c2
1
Parent(s):
d8213a7
added log
Browse files
app.py
CHANGED
@@ -95,27 +95,36 @@ def convert2wav(audio):
|
|
95 |
##########################################################################################################
|
96 |
|
97 |
def process_audio(audio, instream):
|
|
|
|
|
|
|
|
|
|
|
98 |
print("Audio recieved")
|
99 |
if audio is None:
|
100 |
return gr.update(), instream
|
101 |
|
102 |
-
|
103 |
-
instream
|
104 |
-
|
105 |
-
|
|
|
106 |
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
|
115 |
-
|
116 |
-
|
|
|
|
|
|
|
117 |
|
118 |
-
return gr.update(value=None), (24000, outwav.squeeze().cpu().numpy()), stream
|
119 |
|
120 |
|
121 |
with gr.Blocks() as demo:
|
@@ -150,11 +159,12 @@ Monologue” method significantly improves the linguistic quality of generated s
|
|
150 |
output_audio = gr.Audio(label="Processed Audio", streaming=True, autoplay=True)
|
151 |
stream = gr.State()
|
152 |
|
|
|
153 |
|
154 |
input_audio.stop_recording(
|
155 |
fn=process_audio,
|
156 |
inputs=[input_audio, stream],
|
157 |
-
outputs=[input_audio, output_audio, stream]
|
158 |
)
|
159 |
|
160 |
with gr.Row():
|
@@ -173,4 +183,6 @@ Monologue” method significantly improves the linguistic quality of generated s
|
|
173 |
elem_id="citation-button",
|
174 |
show_copy_button=True,
|
175 |
)
|
|
|
|
|
176 |
demo.launch(debug=True)
|
|
|
95 |
##########################################################################################################
|
96 |
|
97 |
def process_audio(audio, instream):
|
98 |
+
log_out = ""
|
99 |
+
outwav = torch.randn(1, 1, 24000 * 10)
|
100 |
+
stream = torch.randn(1, 1, 24000 * 10)
|
101 |
+
|
102 |
+
|
103 |
print("Audio recieved")
|
104 |
if audio is None:
|
105 |
return gr.update(), instream
|
106 |
|
107 |
+
try:
|
108 |
+
if instream is None:
|
109 |
+
instream = (24000, torch.randn(1, 1, 24000 * 10).squeeze().cpu().numpy())
|
110 |
+
print("STREAM RECIEVED")
|
111 |
+
stream = (audio[0], np.concatenate((instream[1], audio[1])))
|
112 |
|
113 |
+
# Assuming instream[1] and audio[1] are valid inputs for convert2wav
|
114 |
+
wav1 = convert2wav(instream)
|
115 |
+
wav2 = convert2wav(audio)
|
116 |
|
117 |
+
# Concatenate along the last dimension (time axis)
|
118 |
+
combined_wav = torch.cat((wav1, wav2), dim=2)
|
119 |
+
print("WAV COMBINED")
|
120 |
|
121 |
+
mimi_codes = compute_codes(combined_wav)
|
122 |
+
print("CODES COMPUTED")
|
123 |
+
outwav = generate_reponse(mimi_codes)
|
124 |
+
except Exception as e:
|
125 |
+
return gr.update(value=None), (24000, outwav.squeeze().cpu().numpy()), stream, gr.update(visible=True,value=f"LOG: {e}")
|
126 |
|
127 |
+
return gr.update(value=None), (24000, outwav.squeeze().cpu().numpy()), stream, gr.update(visible=False)
|
128 |
|
129 |
|
130 |
with gr.Blocks() as demo:
|
|
|
159 |
output_audio = gr.Audio(label="Processed Audio", streaming=True, autoplay=True)
|
160 |
stream = gr.State()
|
161 |
|
162 |
+
log_out = gr.Textbox("Log", visible=False)
|
163 |
|
164 |
input_audio.stop_recording(
|
165 |
fn=process_audio,
|
166 |
inputs=[input_audio, stream],
|
167 |
+
outputs=[input_audio, output_audio, stream, log_out]
|
168 |
)
|
169 |
|
170 |
with gr.Row():
|
|
|
183 |
elem_id="citation-button",
|
184 |
show_copy_button=True,
|
185 |
)
|
186 |
+
|
187 |
+
|
188 |
demo.launch(debug=True)
|