mrestrepo commited on
Commit
b308ed7
·
1 Parent(s): 33c11bc

Update: Use TTS.API for voice generation

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.wav filter=lfs diff=lfs merge=lfs -text
.gitignore CHANGED
@@ -2,6 +2,8 @@
2
  output.wav
3
  /venv/
4
  venv/
 
 
5
  __pycache__/
6
 
7
  audios/
 
2
  output.wav
3
  /venv/
4
  venv/
5
+ /.venv/
6
+ .venv/
7
  __pycache__/
8
 
9
  audios/
app.py CHANGED
@@ -1,66 +1,46 @@
1
- import os
2
  import gradio as gr
3
  import uuid
4
  import requests
5
- import pathlib
6
- from elevenlabs.client import ElevenLabs, AsyncElevenLabs
7
- from elevenlabs import play, save, Voice, stream
8
- from dotenv import load_dotenv
9
  from datetime import timedelta
10
- from gcloud import storage
 
11
 
12
 
13
- load_dotenv()
14
- KEY_ELEVENLABS = os.getenv('ELEVENLABS_KEY')
 
 
15
 
16
 
17
  async def generate_audio(text_input: str, creator: str) -> str:
18
 
19
- voice_id = ''
 
20
 
 
21
  match creator:
22
  case 'Roomie':
23
- voice_id = '2Onew6n5JwT9uEbmTSrO'
 
24
  case 'Xavy':
25
- voice_id = 'cYBsY94mzMC7VpGoVMgr'
 
26
  case 'Bella':
27
- voice_id = 'X9j5sAaRD6aEgBblOUOG'
28
-
29
- CLIENT_ELEVENLABS = ElevenLabs(api_key=KEY_ELEVENLABS)
30
- VOICE_CREATOR = Voice(voice_id=voice_id)
31
- CREDENTIALS_GCP = os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
32
- NAME_BUCKET = os.getenv("NAME_BUCKET")
33
- unique_id = str(uuid.uuid4())
34
-
35
- STORAGE_CLIENT = storage.Client.from_service_account_json(CREDENTIALS_GCP)
36
 
37
- audio = CLIENT_ELEVENLABS.generate(
38
- text=text_input,
39
- voice=VOICE_CREATOR,
40
- model="eleven_multilingual_v2"
41
- )
42
 
43
- source_audio_file_name = f'./audios/file_audio_{unique_id}.wav'
 
 
 
 
 
44
 
45
- try:
46
- save(audio, source_audio_file_name)
47
- except Exception as e:
48
- print(e)
49
-
50
- destination_blob_name_audio = unique_id + '.wav'
51
-
52
- bucket = STORAGE_CLIENT.bucket(NAME_BUCKET)
53
- blob = bucket.blob(destination_blob_name_audio)
54
- try:
55
- blob.upload_from_filename(source_audio_file_name)
56
- except Exception as e:
57
- print(e)
58
-
59
- try:
60
- url_expiration = timedelta(minutes=15)
61
- signed_url_audio = blob.generate_signed_url(expiration=url_expiration)
62
- except Exception as e:
63
- print(e)
64
 
65
  return gr.Audio(value=source_audio_file_name)
66
 
@@ -68,7 +48,8 @@ async def generate_audio(text_input: str, creator: str) -> str:
68
  app = gr.Interface(
69
  fn=generate_audio,
70
  inputs=[gr.Textbox(label='Text to Speach'), gr.Dropdown(
71
- ['Roomie', 'Xavy', 'Bella'], label="Coice your creator")],
 
72
  outputs=['audio']
73
  )
74
 
 
 
1
  import gradio as gr
2
  import uuid
3
  import requests
 
 
 
 
4
  from datetime import timedelta
5
+ from TTS.api import TTS
6
+ import locale
7
 
8
 
9
+ locale.getpreferredencoding = lambda: "UTF-8"
10
+
11
+ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=True)
12
+ tts = TTS("xtts_v2.0.2", gpu=True)
13
 
14
 
15
  async def generate_audio(text_input: str, creator: str) -> str:
16
 
17
+ refer_voices = ''
18
+ unique_id = str(uuid.uuid4())
19
 
20
+ output_file = f'{unique_id}.wav'
21
  match creator:
22
  case 'Roomie':
23
+ refer_voices = ["./assets/roomie/roomie_emocionado_base_1.wav", "./assets/roomie/ref_12.wav", "./assets/roomie/ref_11.wav", "./assets/roomie/ref_10.wav",
24
+ "./assets/roomie/ref_1.wav", "./assets/roomie/ref_6.wav", "./assets/roomie/ref_7.wav", "./assets/roomie/ref_8.wav", "./assets/roomie/roomie_emocionado_base_2.wav",]
25
  case 'Xavy':
26
+ refer_voices = ["./assets/xavy/neutro_3.wav",
27
+ "./assets/xavy/neutro_1.wav", "./assets/xavy/neutro_2.wav"]
28
  case 'Bella':
29
+ refer_voices = ["./assets/bella/neutro_2.wav",
30
+ "./assets/bella/neutro_1.wav", "./assets/bella/neutro_3.wav"]
 
 
 
 
 
 
 
31
 
32
+ case 'Julia':
33
+ refer_voices = ["assets/julia/neutro_4_Final_fast.wav", "assets/julia/enfadado_1_Final.wav", "assets/julia/enfadado_2_Final.wav",
34
+ "assets/julia/enfadado_3_Final.wav", "assets/julia/emocionada_1.wav", "assets/julia/emocionada_2_Final.wav"]
 
 
35
 
36
+ tts.tts_to_file(text=text_input,
37
+ file_path=output_file,
38
+ speaker_wav=refer_voices,
39
+ language="en",
40
+ split_sentences=True,
41
+ )
42
 
43
+ source_audio_file_name = output_file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
  return gr.Audio(value=source_audio_file_name)
46
 
 
48
  app = gr.Interface(
49
  fn=generate_audio,
50
  inputs=[gr.Textbox(label='Text to Speach'), gr.Dropdown(
51
+ ['Roomie', 'Xavy', 'Bella', 'Julia'], label="Coice your creator")],
52
+
53
  outputs=['audio']
54
  )
55
 
assets/bella/en/neutro_1.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98dc1a8dd3a2ba8e161689b7b57b71f97134878fd67c955293587c82b17f0d3e
3
+ size 7845662
assets/bella/en/neutro_2.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df483d6b4c938839f4cfb1be42dc06f3d31b1cdc742911aa7e774c765a50a543
3
+ size 6258690
assets/bella/en/neutro_3.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3ede7b64c845e13c86a5c7a42288edb0b65d0d6f8fe74ff1afcf2ab723da346
3
+ size 5256176
assets/julia/emocionada_1.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe37c7fa25dbfbc506a7499bb97c407fdd610731c7ca2b8ba8925212523d1729
3
+ size 1658984
assets/julia/emocionada_2_Final.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82c875ee37816301c854f68c87455b0912d81155841b05bbcd5ded572e8ebfe3
3
+ size 1440008
assets/julia/enfadado_1_Final.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4acb29543f94308279d6f9b570bcbff6b27001e2352e2757aedb68816cf40b66
3
+ size 1349096
assets/julia/enfadado_2_Final.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:583c19a521f2e92a6b131661e1a82cbe9e82e9e687eb58c1c6408f2c9bbc7c21
3
+ size 1314194
assets/julia/enfadado_3_Final.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:101e941d79be49e530b81c973328974857227ddef1d9207bff41396fbfe49c4b
3
+ size 1375176
assets/julia/neutro_4_Final_fast.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:601ff8eba95b1d6eb0bbea865cf0bef531d3ee846df86b72bf62ca5a0dfac9f1
3
+ size 1164208
assets/roomie/ref_1.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:536a06686422eeef5cae532de4e8eeaf371b0156bf3965e8965978d09ad7898f
3
+ size 2610862
assets/roomie/ref_10.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62e43e0fd5a524b7ef4d0071c8e6d49052cf9deef9cfd700149ecc04393ec472
3
+ size 3880742
assets/roomie/ref_11.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04e017bf4772a3fff0a8550249bc1871d9f35be8ae41189af11d2c6228b7b3f5
3
+ size 4156764
assets/roomie/ref_12.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c5ecdc575c16e32f399f1739c8eb614a1225467ca179e2519dff4986ac6011f
3
+ size 4897922
assets/roomie/ref_6.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f44787ce4f3c35180a07df2276a5aa02f033d5840497e8a88446abfd39e29f6a
3
+ size 1073680
assets/roomie/ref_7.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed002bfc9ce7f28e7f4d1784383983f81a9315e19dd6cc68e24d41f755fba8f7
3
+ size 1046772
assets/roomie/ref_8.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e0ea7f7d46ba0d676d2b7ecc411445993be4d44dd5c4ca4e8d62af0887259c0
3
+ size 1091886
assets/roomie/roomie_emocionado_base_1.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57b7c1003bf1d80f943cbf5646d93fdc81644e986e95cde658629532cc0d5f6d
3
+ size 1941612
assets/roomie/roomie_emocionado_base_2.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6f95ab42e4ca4f8e0b4a353f9bd1892d5fed41626bec212b0369e8fa048f002
3
+ size 2073488
assets/xavy/neutro_1.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05817d73012f2dd9be6b43873ccdc17619719b451e31c5f47e2412f77779921f
3
+ size 4564004
assets/xavy/neutro_2.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ec5a3ed875d08961e6bb81fcf9884882f2b8c27633dba427445e109ca0702d9
3
+ size 5531872
assets/xavy/neutro_3.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8367089cd45b9aa1e68be518638a84fe26a03fbe0966d561dbe658de891536fc
3
+ size 6015018
audios/file_audio_aa99e44b-8b91-4571-a73f-25a7b701b653.wav CHANGED
Binary files a/audios/file_audio_aa99e44b-8b91-4571-a73f-25a7b701b653.wav and b/audios/file_audio_aa99e44b-8b91-4571-a73f-25a7b701b653.wav differ
 
requirements.txt CHANGED
@@ -1,6 +1,197 @@
1
- requests==2.31.0
2
- pathlib==1.0.1
3
- python-dotenv==1.0.1
4
- gradio==4.15.0
5
- elevenlabs==1.1.2
6
- gcloud==0.18.3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==2.1.0
2
+ aiofiles==23.2.1
3
+ aiohappyeyeballs==2.4.0
4
+ aiohttp==3.10.5
5
+ aiosignal==1.3.1
6
+ annotated-types==0.7.0
7
+ anyascii==0.3.2
8
+ anyio==4.4.0
9
+ asttokens==2.4.1
10
+ attrs==24.2.0
11
+ audioread==3.0.1
12
+ babel==2.16.0
13
+ backcall==0.2.0
14
+ bangla==0.0.2
15
+ beautifulsoup4==4.12.3
16
+ bleach==6.1.0
17
+ blinker==1.8.2
18
+ blis==0.7.11
19
+ bnnumerizer==0.0.2
20
+ bnunicodenormalizer==0.1.7
21
+ catalogue==2.0.10
22
+ certifi==2024.8.30
23
+ cffi==1.17.1
24
+ charset-normalizer==3.3.2
25
+ click==8.1.7
26
+ cloudpathlib==0.19.0
27
+ colorama==0.4.6
28
+ confection==0.1.5
29
+ contourpy==1.3.0
30
+ coqpit==0.0.17
31
+ cycler==0.12.1
32
+ cymem==2.0.8
33
+ Cython==3.0.11
34
+ dateparser==1.1.8
35
+ decorator==5.1.1
36
+ defusedxml==0.7.1
37
+ docopt==0.6.2
38
+ einops==0.8.0
39
+ encodec==0.1.1
40
+ executing==2.1.0
41
+ fastapi==0.112.2
42
+ fastjsonschema==2.20.0
43
+ ffmpy==0.4.0
44
+ filelock==3.15.4
45
+ Flask==3.0.3
46
+ fonttools==4.53.1
47
+ frozenlist==1.4.1
48
+ fsspec==2024.9.0
49
+ g2pkk==0.1.2
50
+ gradio==4.42.0
51
+ gradio_client==1.3.0
52
+ grpcio==1.66.1
53
+ gruut==2.2.3
54
+ gruut-ipa==0.13.0
55
+ gruut-lang-de==2.0.1
56
+ gruut-lang-en==2.0.1
57
+ gruut-lang-es==2.0.1
58
+ gruut-lang-fr==2.0.2
59
+ h11==0.14.0
60
+ hangul-romanize==0.1.0
61
+ httpcore==1.0.5
62
+ httpx==0.27.2
63
+ huggingface-hub==0.24.6
64
+ idna==3.8
65
+ importlib_resources==6.4.4
66
+ inflect==7.3.1
67
+ ipython==8.12.3
68
+ itsdangerous==2.2.0
69
+ jamo==0.4.1
70
+ jedi==0.19.1
71
+ jieba==0.42.1
72
+ Jinja2==3.1.4
73
+ joblib==1.4.2
74
+ jsonlines==1.2.0
75
+ jsonschema==4.23.0
76
+ jsonschema-specifications==2023.12.1
77
+ jupyter_client==8.6.2
78
+ jupyter_core==5.7.2
79
+ jupyterlab_pygments==0.3.0
80
+ kiwisolver==1.4.7
81
+ langcodes==3.4.0
82
+ language_data==1.2.0
83
+ lazy_loader==0.4
84
+ librosa==0.10.2.post1
85
+ llvmlite==0.43.0
86
+ marisa-trie==1.2.0
87
+ Markdown==3.7
88
+ markdown-it-py==3.0.0
89
+ MarkupSafe==2.1.5
90
+ matplotlib==3.9.2
91
+ matplotlib-inline==0.1.7
92
+ mdurl==0.1.2
93
+ mistune==3.0.2
94
+ more-itertools==10.4.0
95
+ mpmath==1.3.0
96
+ msgpack==1.0.8
97
+ multidict==6.0.5
98
+ murmurhash==1.0.10
99
+ nbclient==0.10.0
100
+ nbconvert==7.16.4
101
+ nbformat==5.10.4
102
+ networkx==2.8.8
103
+ nltk==3.9.1
104
+ num2words==0.5.13
105
+ numba==0.60.0
106
+ numpy==1.26.4
107
+ orjson==3.10.7
108
+ packaging==24.1
109
+ pandas==1.5.3
110
+ pandocfilters==1.5.1
111
+ parso==0.8.4
112
+ pickleshare==0.7.5
113
+ pillow==10.4.0
114
+ pipreqs==0.5.0
115
+ platformdirs==4.2.2
116
+ pooch==1.8.2
117
+ preshed==3.0.9
118
+ prompt_toolkit==3.0.47
119
+ protobuf==5.28.0
120
+ psutil==6.0.0
121
+ pure_eval==0.2.3
122
+ pycparser==2.22
123
+ pydantic==2.8.2
124
+ pydantic_core==2.20.1
125
+ pydub==0.25.1
126
+ Pygments==2.18.0
127
+ pynndescent==0.5.13
128
+ pyparsing==3.1.4
129
+ pypinyin==0.52.0
130
+ pysbd==0.3.4
131
+ python-crfsuite==0.9.10
132
+ python-dateutil==2.9.0.post0
133
+ python-multipart==0.0.9
134
+ pytz==2024.1
135
+ pywin32==306
136
+ PyYAML==6.0.2
137
+ pyzmq==26.2.0
138
+ referencing==0.35.1
139
+ regex==2024.7.24
140
+ requests==2.32.3
141
+ rich==13.8.0
142
+ rpds-py==0.20.0
143
+ ruff==0.6.3
144
+ safetensors==0.4.4
145
+ scikit-learn==1.5.1
146
+ scipy==1.14.1
147
+ semantic-version==2.10.0
148
+ shellingham==1.5.4
149
+ six==1.16.0
150
+ smart-open==7.0.4
151
+ sniffio==1.3.1
152
+ soundfile==0.12.1
153
+ soupsieve==2.6
154
+ soxr==0.5.0.post1
155
+ spacy==3.7.6
156
+ spacy-legacy==3.0.12
157
+ spacy-loggers==1.0.5
158
+ srsly==2.4.8
159
+ stack-data==0.6.3
160
+ starlette==0.38.4
161
+ SudachiDict-core==20240716
162
+ SudachiPy==0.6.8
163
+ sympy==1.12
164
+ tensorboard==2.17.1
165
+ tensorboard-data-server==0.7.2
166
+ thinc==8.2.5
167
+ threadpoolctl==3.5.0
168
+ tinycss2==1.3.0
169
+ tokenizers==0.19.1
170
+ tomlkit==0.12.0
171
+ torch==2.4.1+cu118
172
+ torchaudio==2.4.1+cu118
173
+ torchvision==0.19.1+cu118
174
+ tornado==6.4.1
175
+ tqdm==4.66.5
176
+ trainer==0.0.36
177
+ traitlets==5.14.3
178
+ transformers==4.44.2
179
+ TTS==0.22.0
180
+ typeguard==4.3.0
181
+ typer==0.12.5
182
+ typing_extensions==4.12.2
183
+ tzdata==2024.1
184
+ tzlocal==5.2
185
+ umap-learn==0.5.6
186
+ Unidecode==1.3.8
187
+ urllib3==2.2.2
188
+ uvicorn==0.30.6
189
+ wasabi==1.1.3
190
+ wcwidth==0.2.13
191
+ weasel==0.4.1
192
+ webencodings==0.5.1
193
+ websockets==12.0
194
+ Werkzeug==3.0.4
195
+ wrapt==1.16.0
196
+ yarg==0.1.9
197
+ yarl==1.9.9