Gregniuki commited on
Commit
50ec890
·
1 Parent(s): 2a20232

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -23
app.py CHANGED
@@ -90,7 +90,8 @@ def detect_onnx_models(path):
90
  else:
91
  return None
92
 
93
- @app.post("/synthesize")
 
94
  async def main(
95
  request: Request,
96
  text_input: str = Form(...),
@@ -128,22 +129,9 @@ async def main(
128
  speaker_selection.options = config["speaker_id_map"].values()
129
  speaker_selection.layout.visibility = 'visible'
130
  preview_sid = 0
131
- if enhanced_accessibility:
132
- playaudio("multispeaker")
133
  else:
134
  speaker_selection.layout.visibility = 'hidden'
135
  preview_sid = None
136
-
137
- if enhanced_accessibility:
138
- inferencing(
139
- model,
140
- config,
141
- preview_sid,
142
- lan.translate(
143
- config["espeak"]["voice"][:2],
144
- "Interface openned. Write your texts, configure the different synthesis options or download all the voices you want. Enjoy!"
145
- )
146
- )
147
  else:
148
  voice_model_names = []
149
  for current in onnx_models:
@@ -199,8 +187,11 @@ async def main(
199
  auto_play = play.value
200
  inferencing(model, config, sid, text, rate, noise_scale, noise_scale_w, auto_play)
201
 
202
-
203
- return {"message": f"Text to synthesize: {text_input}, Speed: {speed_slider}, Play: {play}"}
 
 
 
204
 
205
  def load_onnx(model, sess_options, providers = ["CPUExecutionProvider"]):
206
  _LOGGER.debug("Loading model from %s", model)
@@ -249,8 +240,8 @@ def phonemes_to_ids(config, phonemes: List[str]) -> List[int]:
249
  ids.extend(id_map[PAD])
250
  ids.extend(id_map[EOS])
251
  return ids
252
- @app.get("/generate_audio")
253
- async def inferencing(model, config, sid, line, length_scale = 1, noise_scale = 0.667, noise_scale_w = 0.8, auto_play=True):
254
  audios = []
255
  if config["phoneme_type"] == "PhonemeType.ESPEAK":
256
  config["phoneme_type"] = "espeak"
@@ -284,12 +275,8 @@ async def inferencing(model, config, sid, line, length_scale = 1, noise_scale =
284
  audios.append(audio)
285
  merged_audio = np.concatenate(audios)
286
  sample_rate = config["audio"]["sample_rate"]
287
- # Save the audio as a temporary WAV file
288
- temp_audio_path = os.path.join(tempfile.gettempdir(), "generated_audio.wav")
289
- sf.write(temp_audio_path, merged_audio, config["audio"]["sample_rate"])
290
 
291
  # Return the audio file as a FastAPI response
292
- return FileResponse(temp_audio_path)
293
  # display(Markdown(f"{line}"))
294
  # display(Audio(merged_audio, rate=sample_rate, autoplay=auto_play))
295
 
@@ -395,7 +382,7 @@ async def read_root(request: Request):
395
 
396
 
397
  if __name__ == "__main__":
398
- main()
399
  import uvicorn
400
  uvicorn.run(app, host="0.0.0.0", port=7860)
401
  # main()
 
90
  else:
91
  return None
92
 
93
+ #@app.post("/synthesize")
94
+ @app.post("/synthesize", response_class=FileResponse)
95
  async def main(
96
  request: Request,
97
  text_input: str = Form(...),
 
129
  speaker_selection.options = config["speaker_id_map"].values()
130
  speaker_selection.layout.visibility = 'visible'
131
  preview_sid = 0
 
 
132
  else:
133
  speaker_selection.layout.visibility = 'hidden'
134
  preview_sid = None
 
 
 
 
 
 
 
 
 
 
 
135
  else:
136
  voice_model_names = []
137
  for current in onnx_models:
 
187
  auto_play = play.value
188
  inferencing(model, config, sid, text, rate, noise_scale, noise_scale_w, auto_play)
189
 
190
+ # Save the audio as a temporary WAV file
191
+ temp_audio_path = os.path.join(tempfile.gettempdir(), "generated_audio.wav")
192
+ sf.write(temp_audio_path, merged_audio, config["audio"]["sample_rate"])
193
+ return FileResponse(temp_audio_path)
194
+ # return {"message": f"Text to synthesize: {text_input}, Speed: {speed_slider}, Play: {play}"}
195
 
196
  def load_onnx(model, sess_options, providers = ["CPUExecutionProvider"]):
197
  _LOGGER.debug("Loading model from %s", model)
 
240
  ids.extend(id_map[PAD])
241
  ids.extend(id_map[EOS])
242
  return ids
243
+
244
+ def inferencing(model, config, sid, line, length_scale = 1, noise_scale = 0.667, noise_scale_w = 0.8, auto_play=True):
245
  audios = []
246
  if config["phoneme_type"] == "PhonemeType.ESPEAK":
247
  config["phoneme_type"] = "espeak"
 
275
  audios.append(audio)
276
  merged_audio = np.concatenate(audios)
277
  sample_rate = config["audio"]["sample_rate"]
 
 
 
278
 
279
  # Return the audio file as a FastAPI response
 
280
  # display(Markdown(f"{line}"))
281
  # display(Audio(merged_audio, rate=sample_rate, autoplay=auto_play))
282
 
 
382
 
383
 
384
  if __name__ == "__main__":
385
+ # main()
386
  import uvicorn
387
  uvicorn.run(app, host="0.0.0.0", port=7860)
388
  # main()