mrq
commited on
Commit
·
b12aaf0
1
Parent(s):
33f0c95
- app.py +6 -2
- requirements.txt +1 -1
app.py
CHANGED
@@ -127,6 +127,9 @@ def get_speakers():
|
|
127 |
def get_languages():
|
128 |
return list(get_lang_symmap().keys()) + ["auto"]
|
129 |
|
|
|
|
|
|
|
130 |
#@gradio_wrapper(inputs=layout["dataset"]["inputs"].keys())
|
131 |
def load_sample( speaker ):
|
132 |
metadata_path = cfg.metadata_dir / f'{speaker}.json'
|
@@ -208,7 +211,7 @@ def do_inference_tts( progress=gr.Progress(track_tqdm=True), *args, **kwargs ):
|
|
208 |
parser = argparse.ArgumentParser(allow_abbrev=False, add_help=False)
|
209 |
# I'm very sure I can procedurally generate this list
|
210 |
parser.add_argument("--text", type=str, default=kwargs["text"])
|
211 |
-
parser.add_argument("--task", type=str, default="
|
212 |
parser.add_argument("--modality", type=str, default=kwargs["modality"])
|
213 |
parser.add_argument("--references", type=str, default=kwargs["reference"])
|
214 |
parser.add_argument("--voice-convert", type=str, default=kwargs["voice-convert"])
|
@@ -336,7 +339,7 @@ def do_inference_stt( progress=gr.Progress(track_tqdm=True), *args, **kwargs ):
|
|
336 |
|
337 |
parser = argparse.ArgumentParser(allow_abbrev=False, add_help=False)
|
338 |
# I'm very sure I can procedurally generate this list
|
339 |
-
parser.add_argument("--task", type=str, default="
|
340 |
parser.add_argument("--references", type=str, default=kwargs["reference"])
|
341 |
parser.add_argument("--max-duration", type=int, default=0)
|
342 |
parser.add_argument("--language", type=str, default=kwargs["language"])
|
@@ -460,6 +463,7 @@ with ui:
|
|
460 |
with gr.Row():
|
461 |
layout["inference_tts"]["inputs"]["text-language"] = gr.Dropdown(choices=get_languages(), label="Language (Text)", value="auto", info="Language the input text is in.")
|
462 |
layout["inference_tts"]["inputs"]["language"] = gr.Dropdown(choices=get_languages(), label="Language (Output)", value="auto", info="Target language/accent to output.")
|
|
|
463 |
with gr.Row():
|
464 |
layout["inference_tts"]["inputs"]["split-text-by"] = gr.Dropdown(choices=["sentences", "lines"], label="Text Delimiter", info="How to split the text into utterances.", value="sentences")
|
465 |
layout["inference_tts"]["inputs"]["context-history"] = gr.Slider(value=0, minimum=0, maximum=4, step=1, label="(Rolling) Context History", info="How many prior lines to serve as the context/prefix (0 to disable).")
|
|
|
127 |
def get_languages():
|
128 |
return list(get_lang_symmap().keys()) + ["auto"]
|
129 |
|
130 |
+
def get_tasks():
|
131 |
+
return ["tts", "sr", "nr", "vc"]
|
132 |
+
|
133 |
#@gradio_wrapper(inputs=layout["dataset"]["inputs"].keys())
|
134 |
def load_sample( speaker ):
|
135 |
metadata_path = cfg.metadata_dir / f'{speaker}.json'
|
|
|
211 |
parser = argparse.ArgumentParser(allow_abbrev=False, add_help=False)
|
212 |
# I'm very sure I can procedurally generate this list
|
213 |
parser.add_argument("--text", type=str, default=kwargs["text"])
|
214 |
+
parser.add_argument("--task", type=str, default=kwargs["task"])
|
215 |
parser.add_argument("--modality", type=str, default=kwargs["modality"])
|
216 |
parser.add_argument("--references", type=str, default=kwargs["reference"])
|
217 |
parser.add_argument("--voice-convert", type=str, default=kwargs["voice-convert"])
|
|
|
339 |
|
340 |
parser = argparse.ArgumentParser(allow_abbrev=False, add_help=False)
|
341 |
# I'm very sure I can procedurally generate this list
|
342 |
+
parser.add_argument("--task", type=str, default="stt")
|
343 |
parser.add_argument("--references", type=str, default=kwargs["reference"])
|
344 |
parser.add_argument("--max-duration", type=int, default=0)
|
345 |
parser.add_argument("--language", type=str, default=kwargs["language"])
|
|
|
463 |
with gr.Row():
|
464 |
layout["inference_tts"]["inputs"]["text-language"] = gr.Dropdown(choices=get_languages(), label="Language (Text)", value="auto", info="Language the input text is in.")
|
465 |
layout["inference_tts"]["inputs"]["language"] = gr.Dropdown(choices=get_languages(), label="Language (Output)", value="auto", info="Target language/accent to output.")
|
466 |
+
layout["inference_tts"]["inputs"]["task"] = gr.Dropdown(choices=get_tasks(), label="Task", value="tts", info="")
|
467 |
with gr.Row():
|
468 |
layout["inference_tts"]["inputs"]["split-text-by"] = gr.Dropdown(choices=["sentences", "lines"], label="Text Delimiter", info="How to split the text into utterances.", value="sentences")
|
469 |
layout["inference_tts"]["inputs"]["context-history"] = gr.Slider(value=0, minimum=0, maximum=4, step=1, label="(Rolling) Context History", info="How many prior lines to serve as the context/prefix (0 to disable).")
|
requirements.txt
CHANGED
@@ -4,4 +4,4 @@ torchaudio
|
|
4 |
sageattention==1.0.6
|
5 |
pykakasi
|
6 |
|
7 |
-
vall_e @ git+https://github.com/e-c-k-e-r/vall-e.git@
|
|
|
4 |
sageattention==1.0.6
|
5 |
pykakasi
|
6 |
|
7 |
+
vall_e @ git+https://github.com/e-c-k-e-r/vall-e.git@59bf6b8b3338736cfa38bf888dc0730867370846
|