khof312 commited on
Commit
5cb01e5
·
1 Parent(s): b4c3706

Add token authentication from space secrets and initial Piper synthesis code.

Browse files
Files changed (1) hide show
  1. src/synthesize.py +26 -2
src/synthesize.py CHANGED
@@ -29,7 +29,7 @@ def synth_mms(text:str, model:str):
29
  # raw_response=True)._content
30
 
31
  if model is not None:
32
- pipe = pipeline("text-to-speech", model=model, device=-1) # Change device if it should use GPU
33
  mms_tts = pipe(text)
34
  return mms_tts['audio'], mms_tts['sampling_rate']
35
  else:
@@ -100,7 +100,7 @@ def synth_toucan(text:str, model:str):
100
  Returns:
101
  Streaming Wav and sampling rate.
102
 
103
- NOTE: This wrapper does not let you explore the full range of options possible with the API. The API should allow you to generate female voices, however, it does not seem to be working at the moment.
104
  '''
105
  client = Client("Flux9665/MassivelyMultilingualTTS")
106
  result = client.predict(
@@ -115,4 +115,28 @@ def synth_toucan(text:str, model:str):
115
  api_name="/predict"
116
  )
117
  sampling_rate, wav = wavfile.read(result[0])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  return wav, sampling_rate
 
29
  # raw_response=True)._content
30
 
31
  if model is not None:
32
+ pipe = pipeline("text-to-speech", model=model, device=-1, token=os.environ['TOKEN']) # Change device if it should use GPU
33
  mms_tts = pipe(text)
34
  return mms_tts['audio'], mms_tts['sampling_rate']
35
  else:
 
100
  Returns:
101
  Streaming Wav and sampling rate.
102
 
103
+ NOTES: (1)This wrapper does not let you explore the full range of options possible with the API. (2) The API should allow you to generate female voices, however, it does not seem to be working at the moment. (3) This uses a Huggingface Gradio Space to compute via the API.
104
  '''
105
  client = Client("Flux9665/MassivelyMultilingualTTS")
106
  result = client.predict(
 
115
  api_name="/predict"
116
  )
117
  sampling_rate, wav = wavfile.read(result[0])
118
+ return wav, sampling_rate
119
+
120
+ def synth_piper(text:str, model:str):
121
+ '''
122
+ Use Toucan to synthesize text.
123
+
124
+ Inputs:
125
+ text: Text to synthesze
126
+ model: Model code
127
+ Returns:
128
+ Streaming Wav and sampling rate.
129
+
130
+ NOTES: (1) This uses a Huggingface Gradio Space to compute via the API.
131
+ '''
132
+ client = Client("k2-fsa/text-to-speech")
133
+ result = client.predict(
134
+ language=model[0],
135
+ repo_id=model[1],
136
+ text=text,
137
+ sid="0",
138
+ speed=1,
139
+ api_name="/process"
140
+ )
141
+ sampling_rate, wav = wavfile.read(result[0])
142
  return wav, sampling_rate