Pendrokar commited on
Commit
0ba527d
β€’
1 Parent(s): 60dc387

sync models

Browse files
Files changed (1) hide show
  1. app/models.py +67 -4
app/models.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from gradio_client import handle_file
2
 
3
  # Models to include in the leaderboard, only include models that users can vote on
@@ -48,6 +49,16 @@ AVAILABLE_MODELS = {
48
 
49
  # IMS-Toucan
50
  # 'Flux9665/MassivelyMultilingualTTS': 'Flux9665/MassivelyMultilingualTTS', # 5.1
 
 
 
 
 
 
 
 
 
 
51
 
52
  # HF TTS w issues
53
  'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # irresponsive to exclamation marks # 4.29
@@ -168,7 +179,7 @@ HF_SPACES = {
168
  'function': '/predict',
169
  'text_param_index': 0,
170
  'return_audio_index': 0,
171
- 'is_proprietary': True,
172
  'series': 'Edge TTS',
173
  },
174
 
@@ -218,6 +229,34 @@ HF_SPACES = {
218
  'is_zero_gpu_space': True,
219
  'series': 'StyleTTS',
220
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  }
222
 
223
  # for zero-shot TTS - voice sample used by XTTS (11 seconds)
@@ -317,8 +356,10 @@ OVERRIDE_INPUTS = {
317
  'mrfakename/E2-F5-TTS': {
318
  0: DEFAULT_VOICE_SAMPLE, # voice sample
319
  1: DEFAULT_VOICE_TRANSCRIPT, # transcript of sample (< 15 seconds required)
320
- 3: "F5-TTS", # model
321
- 4: False, # cleanup silence
 
 
322
  },
323
 
324
  # IMS-Toucan
@@ -337,6 +378,28 @@ OVERRIDE_INPUTS = {
337
  2: 'en-us', # lang
338
  3: 8, # lngsteps
339
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
340
  }
341
 
342
 
@@ -385,7 +448,7 @@ def make_link_to_space(model_name, for_leaderboard=False):
385
  try:
386
  if(
387
  for_leaderboard
388
- and HF_SPACES[model_name]['is_proprietary']
389
  ):
390
  model_basename += ' πŸ”'
391
  title += '; πŸ” = online only or proprietary'
 
1
+ import os
2
  from gradio_client import handle_file
3
 
4
  # Models to include in the leaderboard, only include models that users can vote on
 
49
 
50
  # IMS-Toucan
51
  # 'Flux9665/MassivelyMultilingualTTS': 'Flux9665/MassivelyMultilingualTTS', # 5.1
52
+ # StyleTTS v2
53
+ # 'Pendrokar/style-tts-2': 'Pendrokar/style-tts-2', # more votes in OG arena; emotionless
54
+ # StyleTTS kokoro
55
+ 'hexgrad/kokoro': 'hexgrad/kokoro',
56
+
57
+ # MaskGCT (by Amphion)
58
+ # DEMANDS 300 seconds of ZeroGPU
59
+ # 'amphion/maskgct': 'amphion/maskgct',
60
+ # default ZeroGPU borrow time
61
+ 'Svngoku/maskgct-audio-lab': 'Svngoku/maskgct-audio-lab',
62
 
63
  # HF TTS w issues
64
  'LeeSangHoon/HierSpeech_TTS': 'LeeSangHoon/HierSpeech_TTS', # irresponsive to exclamation marks # 4.29
 
179
  'function': '/predict',
180
  'text_param_index': 0,
181
  'return_audio_index': 0,
182
+ 'is_closed_source': True,
183
  'series': 'Edge TTS',
184
  },
185
 
 
229
  'is_zero_gpu_space': True,
230
  'series': 'StyleTTS',
231
  },
232
+
233
+ # StyleTTS v2 kokoro fine tune
234
+ 'hexgrad/kokoro': {
235
+ 'name': 'StyleTTS Kokoro',
236
+ 'function': '/generate',
237
+ 'text_param_index': 0,
238
+ 'return_audio_index': 0,
239
+ 'is_zero_gpu_space': True,
240
+ 'series': 'StyleTTS',
241
+ },
242
+
243
+ # MaskGCT (by Amphion)
244
+ 'amphion/maskgct': {
245
+ 'name': 'MaskGCT',
246
+ 'function': '/predict',
247
+ 'text_param_index': 1,
248
+ 'return_audio_index': 0,
249
+ 'is_zero_gpu_space': True,
250
+ 'series': 'MaskGCT',
251
+ },
252
+ 'Svngoku/maskgct-audio-lab': {
253
+ 'name': 'MaskGCT',
254
+ 'function': '/predict',
255
+ 'text_param_index': 1,
256
+ 'return_audio_index': 0,
257
+ 'is_zero_gpu_space': True,
258
+ 'series': 'MaskGCT',
259
+ },
260
  }
261
 
262
  # for zero-shot TTS - voice sample used by XTTS (11 seconds)
 
356
  'mrfakename/E2-F5-TTS': {
357
  0: DEFAULT_VOICE_SAMPLE, # voice sample
358
  1: DEFAULT_VOICE_TRANSCRIPT, # transcript of sample (< 15 seconds required)
359
+ 3: False, # cleanup silence
360
+ 4: 0.15, #crossfade
361
+ 5: 32, #nfe_slider
362
+ 6: 1, #speed
363
  },
364
 
365
  # IMS-Toucan
 
378
  2: 'en-us', # lang
379
  3: 8, # lngsteps
380
  },
381
+
382
+ # StyleTTS 2 kokoro
383
+ 'hexgrad/kokoro': {
384
+ 1: "af", #voice
385
+ 2: None, #ps
386
+ 3: 1, #speed
387
+ 4: 3000, #trim
388
+ 5: False, #use_gpu; fast enough with multithreaded with CPU
389
+ 6: os.getenv('KOKORO'), #sk
390
+ },
391
+
392
+ # maskGCT (by amphion)
393
+ 'amphion/maskgct': {
394
+ 0: DEFAULT_VOICE_SAMPLE, #prompt_wav
395
+ 2: -1, #target_len
396
+ 3: 25, #n_timesteps
397
+ },
398
+ 'Svngoku/maskgct-audio-lab': {
399
+ 0: DEFAULT_VOICE_SAMPLE, #prompt_wav
400
+ 2: -1, #target_len
401
+ 3: 25, #n_timesteps
402
+ },
403
  }
404
 
405
 
 
448
  try:
449
  if(
450
  for_leaderboard
451
+ and HF_SPACES[model_name]['is_closed_source']
452
  ):
453
  model_basename += ' πŸ”'
454
  title += '; πŸ” = online only or proprietary'