glaswegian-tts-demo

Runtime error

App Files Files Community

divakaivan commited on May 18, 2024

Commit

6be28a0

verified ·

1 Parent(s): 6742dfa

Update app.py

Browse files

Files changed (1) hide show

app.py +2 -25

app.py CHANGED Viewed

@@ -4,12 +4,6 @@ import numpy as np
 import torch
 from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
-#.
-checkpoint = "microsoft/speecht5_tts"
-processor = SpeechT5Processor.from_pretrained(checkpoint)
-model = SpeechT5ForTextToSpeech.from_pretrained("divakaivan/glaswegian_tts")
 from datasets import load_dataset, Audio
 dataset = load_dataset(
@@ -21,7 +15,7 @@ dataset = dataset.cast_column("audio", Audio(sampling_rate=16000))['train']
 from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech
 processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
-model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
 tokenizer = processor.tokenizer
@@ -136,7 +130,7 @@ def predict(text, speaker):
     return (16000, speech)
-title = "SpeechT5: Speech Synthesis"
 description = """
 The <b>SpeechT5</b> model is pre-trained on text as well as speech inputs, with targets that are also a mix of text and speech.
@@ -169,28 +163,11 @@ article = """
 </div>
 """
-examples = [
-    ["It is not in the stars to hold our destiny but in ourselves.", "BDL (male)"],
-    ["The octopus and Oliver went to the opera in October.", "CLB (female)"],
-    ["She sells seashells by the seashore. I saw a kitten eating chicken in the kitchen.", "RMS (male)"],
-    ["Brisk brave brigadiers brandished broad bright blades, blunderbusses, and bludgeons—balancing them badly.", "SLT (female)"],
-    ["A synonym for cinnamon is a cinnamon synonym.", "BDL (male)"],
-    ["How much wood would a woodchuck chuck if a woodchuck could chuck wood? He would chuck, he would, as much as he could, and chuck as much wood as a woodchuck would if a woodchuck could chuck wood.", "CLB (female)"],
-]
 gr.Interface(
     fn=predict,
     inputs=[
         gr.Text(label="Input Text"),
-        gr.Radio(label="Speaker", choices=[
-            "BDL (male)",
-            "CLB (female)",
-            "KSP (male)",
-            "RMS (male)",
-            "SLT (female)",
-            "Surprise Me!"
-        ],
-        value="BDL (male)"),
     ],
     outputs=[
         gr.Audio(label="Generated Speech", type="numpy"),

 import torch
 from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
 from datasets import load_dataset, Audio
 dataset = load_dataset(
 from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech
 processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
+model = SpeechT5ForTextToSpeech.from_pretrained("divakaivan/glaswegian_tts")
 tokenizer = processor.tokenizer
     return (16000, speech)
+title = "Glaswegian TTS"
 description = """
 The <b>SpeechT5</b> model is pre-trained on text as well as speech inputs, with targets that are also a mix of text and speech.
 </div>
 """
 gr.Interface(
     fn=predict,
     inputs=[
         gr.Text(label="Input Text"),
     ],
     outputs=[
         gr.Audio(label="Generated Speech", type="numpy"),