Spaces:
Runtime error
Runtime error
divakaivan
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -4,12 +4,6 @@ import numpy as np
|
|
4 |
import torch
|
5 |
|
6 |
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
|
7 |
-
|
8 |
-
#.
|
9 |
-
checkpoint = "microsoft/speecht5_tts"
|
10 |
-
processor = SpeechT5Processor.from_pretrained(checkpoint)
|
11 |
-
model = SpeechT5ForTextToSpeech.from_pretrained("divakaivan/glaswegian_tts")
|
12 |
-
|
13 |
from datasets import load_dataset, Audio
|
14 |
|
15 |
dataset = load_dataset(
|
@@ -21,7 +15,7 @@ dataset = dataset.cast_column("audio", Audio(sampling_rate=16000))['train']
|
|
21 |
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech
|
22 |
|
23 |
processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
|
24 |
-
model = SpeechT5ForTextToSpeech.from_pretrained("
|
25 |
|
26 |
tokenizer = processor.tokenizer
|
27 |
|
@@ -136,7 +130,7 @@ def predict(text, speaker):
|
|
136 |
return (16000, speech)
|
137 |
|
138 |
|
139 |
-
title = "
|
140 |
|
141 |
description = """
|
142 |
The <b>SpeechT5</b> model is pre-trained on text as well as speech inputs, with targets that are also a mix of text and speech.
|
@@ -169,28 +163,11 @@ article = """
|
|
169 |
</div>
|
170 |
"""
|
171 |
|
172 |
-
examples = [
|
173 |
-
["It is not in the stars to hold our destiny but in ourselves.", "BDL (male)"],
|
174 |
-
["The octopus and Oliver went to the opera in October.", "CLB (female)"],
|
175 |
-
["She sells seashells by the seashore. I saw a kitten eating chicken in the kitchen.", "RMS (male)"],
|
176 |
-
["Brisk brave brigadiers brandished broad bright blades, blunderbusses, and bludgeons—balancing them badly.", "SLT (female)"],
|
177 |
-
["A synonym for cinnamon is a cinnamon synonym.", "BDL (male)"],
|
178 |
-
["How much wood would a woodchuck chuck if a woodchuck could chuck wood? He would chuck, he would, as much as he could, and chuck as much wood as a woodchuck would if a woodchuck could chuck wood.", "CLB (female)"],
|
179 |
-
]
|
180 |
|
181 |
gr.Interface(
|
182 |
fn=predict,
|
183 |
inputs=[
|
184 |
gr.Text(label="Input Text"),
|
185 |
-
gr.Radio(label="Speaker", choices=[
|
186 |
-
"BDL (male)",
|
187 |
-
"CLB (female)",
|
188 |
-
"KSP (male)",
|
189 |
-
"RMS (male)",
|
190 |
-
"SLT (female)",
|
191 |
-
"Surprise Me!"
|
192 |
-
],
|
193 |
-
value="BDL (male)"),
|
194 |
],
|
195 |
outputs=[
|
196 |
gr.Audio(label="Generated Speech", type="numpy"),
|
|
|
4 |
import torch
|
5 |
|
6 |
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
from datasets import load_dataset, Audio
|
8 |
|
9 |
dataset = load_dataset(
|
|
|
15 |
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech
|
16 |
|
17 |
processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
|
18 |
+
model = SpeechT5ForTextToSpeech.from_pretrained("divakaivan/glaswegian_tts")
|
19 |
|
20 |
tokenizer = processor.tokenizer
|
21 |
|
|
|
130 |
return (16000, speech)
|
131 |
|
132 |
|
133 |
+
title = "Glaswegian TTS"
|
134 |
|
135 |
description = """
|
136 |
The <b>SpeechT5</b> model is pre-trained on text as well as speech inputs, with targets that are also a mix of text and speech.
|
|
|
163 |
</div>
|
164 |
"""
|
165 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
|
167 |
gr.Interface(
|
168 |
fn=predict,
|
169 |
inputs=[
|
170 |
gr.Text(label="Input Text"),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
],
|
172 |
outputs=[
|
173 |
gr.Audio(label="Generated Speech", type="numpy"),
|