Spaces:

OOI-FrontierTech
/

tts_mockingbird

Running

khof312 commited on Sep 20, 2024

Commit

5cb01e5

1 Parent(s): b4c3706

Add token authentication from space secrets and initial Piper synthesis code.

Files changed (1) hide show

src/synthesize.py CHANGED Viewed

@@ -29,7 +29,7 @@ def synth_mms(text:str, model:str):
     #                    raw_response=True)._content
     if model is not None:
-        pipe = pipeline("text-to-speech", model=model, device=-1) # Change device if it should use GPU
         mms_tts = pipe(text)
         return mms_tts['audio'], mms_tts['sampling_rate']
     else:
@@ -100,7 +100,7 @@ def synth_toucan(text:str, model:str):
     Returns:
         Streaming Wav and sampling rate.
-    NOTE: This wrapper does not let you explore the full range of options possible with the API. The API should allow you to generate female voices, however, it does not seem to be working at the moment.
     '''
     client = Client("Flux9665/MassivelyMultilingualTTS")
     result = client.predict(
@@ -115,4 +115,28 @@ def synth_toucan(text:str, model:str):
     		api_name="/predict"
     )
     sampling_rate, wav = wavfile.read(result[0])
     return wav, sampling_rate

     #                    raw_response=True)._content
     if model is not None:
+        pipe = pipeline("text-to-speech", model=model, device=-1, token=os.environ['TOKEN']) # Change device if it should use GPU
         mms_tts = pipe(text)
         return mms_tts['audio'], mms_tts['sampling_rate']
     else:
     Returns:
         Streaming Wav and sampling rate.
+    NOTES: (1)This wrapper does not let you explore the full range of options possible with the API. (2) The API should allow you to generate female voices, however, it does not seem to be working at the moment. (3) This uses a Huggingface Gradio Space to compute via the API.
     '''
     client = Client("Flux9665/MassivelyMultilingualTTS")
     result = client.predict(
     		api_name="/predict"
     )
     sampling_rate, wav = wavfile.read(result[0])
+    return wav, sampling_rate
+def synth_piper(text:str, model:str):
+    '''
+    Use Toucan to synthesize text.
+    Inputs:
+        text: Text to synthesze
+        model: Model code
+    Returns:
+        Streaming Wav and sampling rate.
+    NOTES: (1) This uses a Huggingface Gradio Space to compute via the API.
+    '''
+    client = Client("k2-fsa/text-to-speech")
+    result =  client.predict(
+    		language=model[0],
+    		repo_id=model[1],
+    		text=text,
+    		sid="0",
+    		speed=1,
+    		api_name="/process"
+    )
+    sampling_rate, wav = wavfile.read(result[0])
     return wav, sampling_rate