Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -90,7 +90,8 @@ def detect_onnx_models(path):
|
|
90 |
else:
|
91 |
return None
|
92 |
|
93 |
-
|
|
|
94 |
async def main(
|
95 |
request: Request,
|
96 |
text_input: str = Form(...),
|
@@ -128,22 +129,9 @@ async def main(
|
|
128 |
speaker_selection.options = config["speaker_id_map"].values()
|
129 |
speaker_selection.layout.visibility = 'visible'
|
130 |
preview_sid = 0
|
131 |
-
if enhanced_accessibility:
|
132 |
-
playaudio("multispeaker")
|
133 |
else:
|
134 |
speaker_selection.layout.visibility = 'hidden'
|
135 |
preview_sid = None
|
136 |
-
|
137 |
-
if enhanced_accessibility:
|
138 |
-
inferencing(
|
139 |
-
model,
|
140 |
-
config,
|
141 |
-
preview_sid,
|
142 |
-
lan.translate(
|
143 |
-
config["espeak"]["voice"][:2],
|
144 |
-
"Interface openned. Write your texts, configure the different synthesis options or download all the voices you want. Enjoy!"
|
145 |
-
)
|
146 |
-
)
|
147 |
else:
|
148 |
voice_model_names = []
|
149 |
for current in onnx_models:
|
@@ -199,8 +187,11 @@ async def main(
|
|
199 |
auto_play = play.value
|
200 |
inferencing(model, config, sid, text, rate, noise_scale, noise_scale_w, auto_play)
|
201 |
|
202 |
-
|
203 |
-
|
|
|
|
|
|
|
204 |
|
205 |
def load_onnx(model, sess_options, providers = ["CPUExecutionProvider"]):
|
206 |
_LOGGER.debug("Loading model from %s", model)
|
@@ -249,8 +240,8 @@ def phonemes_to_ids(config, phonemes: List[str]) -> List[int]:
|
|
249 |
ids.extend(id_map[PAD])
|
250 |
ids.extend(id_map[EOS])
|
251 |
return ids
|
252 |
-
|
253 |
-
|
254 |
audios = []
|
255 |
if config["phoneme_type"] == "PhonemeType.ESPEAK":
|
256 |
config["phoneme_type"] = "espeak"
|
@@ -284,12 +275,8 @@ async def inferencing(model, config, sid, line, length_scale = 1, noise_scale =
|
|
284 |
audios.append(audio)
|
285 |
merged_audio = np.concatenate(audios)
|
286 |
sample_rate = config["audio"]["sample_rate"]
|
287 |
-
# Save the audio as a temporary WAV file
|
288 |
-
temp_audio_path = os.path.join(tempfile.gettempdir(), "generated_audio.wav")
|
289 |
-
sf.write(temp_audio_path, merged_audio, config["audio"]["sample_rate"])
|
290 |
|
291 |
# Return the audio file as a FastAPI response
|
292 |
-
return FileResponse(temp_audio_path)
|
293 |
# display(Markdown(f"{line}"))
|
294 |
# display(Audio(merged_audio, rate=sample_rate, autoplay=auto_play))
|
295 |
|
@@ -395,7 +382,7 @@ async def read_root(request: Request):
|
|
395 |
|
396 |
|
397 |
if __name__ == "__main__":
|
398 |
-
|
399 |
import uvicorn
|
400 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|
401 |
# main()
|
|
|
90 |
else:
|
91 |
return None
|
92 |
|
93 |
+
#@app.post("/synthesize")
|
94 |
+
@app.post("/synthesize", response_class=FileResponse)
|
95 |
async def main(
|
96 |
request: Request,
|
97 |
text_input: str = Form(...),
|
|
|
129 |
speaker_selection.options = config["speaker_id_map"].values()
|
130 |
speaker_selection.layout.visibility = 'visible'
|
131 |
preview_sid = 0
|
|
|
|
|
132 |
else:
|
133 |
speaker_selection.layout.visibility = 'hidden'
|
134 |
preview_sid = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
else:
|
136 |
voice_model_names = []
|
137 |
for current in onnx_models:
|
|
|
187 |
auto_play = play.value
|
188 |
inferencing(model, config, sid, text, rate, noise_scale, noise_scale_w, auto_play)
|
189 |
|
190 |
+
# Save the audio as a temporary WAV file
|
191 |
+
temp_audio_path = os.path.join(tempfile.gettempdir(), "generated_audio.wav")
|
192 |
+
sf.write(temp_audio_path, merged_audio, config["audio"]["sample_rate"])
|
193 |
+
return FileResponse(temp_audio_path)
|
194 |
+
# return {"message": f"Text to synthesize: {text_input}, Speed: {speed_slider}, Play: {play}"}
|
195 |
|
196 |
def load_onnx(model, sess_options, providers = ["CPUExecutionProvider"]):
|
197 |
_LOGGER.debug("Loading model from %s", model)
|
|
|
240 |
ids.extend(id_map[PAD])
|
241 |
ids.extend(id_map[EOS])
|
242 |
return ids
|
243 |
+
|
244 |
+
def inferencing(model, config, sid, line, length_scale = 1, noise_scale = 0.667, noise_scale_w = 0.8, auto_play=True):
|
245 |
audios = []
|
246 |
if config["phoneme_type"] == "PhonemeType.ESPEAK":
|
247 |
config["phoneme_type"] = "espeak"
|
|
|
275 |
audios.append(audio)
|
276 |
merged_audio = np.concatenate(audios)
|
277 |
sample_rate = config["audio"]["sample_rate"]
|
|
|
|
|
|
|
278 |
|
279 |
# Return the audio file as a FastAPI response
|
|
|
280 |
# display(Markdown(f"{line}"))
|
281 |
# display(Audio(merged_audio, rate=sample_rate, autoplay=auto_play))
|
282 |
|
|
|
382 |
|
383 |
|
384 |
if __name__ == "__main__":
|
385 |
+
# main()
|
386 |
import uvicorn
|
387 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|
388 |
# main()
|