Pendrokar commited on
Commit
75485ec
1 Parent(s): 2dfe855

F5 TTS API nfe_slider param fix

Browse files
Files changed (2) hide show
  1. app.py +7 -6
  2. test_tts_e2_f5_f5.py +1 -0
app.py CHANGED
@@ -427,7 +427,8 @@ OVERRIDE_INPUTS = {
427
  1: DEFAULT_VOICE_TRANSCRIPT, # transcript of sample (< 15 seconds required)
428
  3: False, # cleanup silence
429
  4: 0.15, #crossfade
430
- 5: 1, #speed
 
431
  },
432
 
433
  # IMS-Toucan
@@ -1135,7 +1136,7 @@ def synthandreturn(text, request: gr.Request):
1135
  # assume the index is one of the first 9 return params
1136
  return_audio_index = int(HF_SPACES[model]['return_audio_index'])
1137
  endpoints = mdl_space.view_api(all_endpoints=True, print_info=False, return_format='dict')
1138
-
1139
  api_name = None
1140
  fn_index = None
1141
  end_parameters = None
@@ -1143,7 +1144,7 @@ def synthandreturn(text, request: gr.Request):
1143
  if '/' == HF_SPACES[model]['function'][0]:
1144
  # audio sync function name
1145
  api_name = HF_SPACES[model]['function']
1146
-
1147
  end_parameters = _get_param_examples(
1148
  endpoints['named_endpoints'][api_name]['parameters']
1149
  )
@@ -1151,7 +1152,7 @@ def synthandreturn(text, request: gr.Request):
1151
  else:
1152
  # endpoint index is the first character
1153
  fn_index = int(HF_SPACES[model]['function'])
1154
-
1155
  end_parameters = _get_param_examples(
1156
  endpoints['unnamed_endpoints'][str(fn_index)]['parameters']
1157
  )
@@ -1248,7 +1249,7 @@ def synthandreturn(text, request: gr.Request):
1248
  pass
1249
 
1250
  return inputs
1251
-
1252
  def _cache_sample(text, model):
1253
  # skip caching if not hardcoded sentence
1254
  if (text not in sents):
@@ -1310,7 +1311,7 @@ def synthandreturn(text, request: gr.Request):
1310
  # cache the result
1311
  for model in [mdl1k, mdl2k]:
1312
  _cache_sample(text, model)
1313
-
1314
  #debug
1315
  # print(results)
1316
  # print(list(results.keys())[0])
 
427
  1: DEFAULT_VOICE_TRANSCRIPT, # transcript of sample (< 15 seconds required)
428
  3: False, # cleanup silence
429
  4: 0.15, #crossfade
430
+ 5: 32, #nfe_slider
431
+ 6: 1, #speed
432
  },
433
 
434
  # IMS-Toucan
 
1136
  # assume the index is one of the first 9 return params
1137
  return_audio_index = int(HF_SPACES[model]['return_audio_index'])
1138
  endpoints = mdl_space.view_api(all_endpoints=True, print_info=False, return_format='dict')
1139
+
1140
  api_name = None
1141
  fn_index = None
1142
  end_parameters = None
 
1144
  if '/' == HF_SPACES[model]['function'][0]:
1145
  # audio sync function name
1146
  api_name = HF_SPACES[model]['function']
1147
+
1148
  end_parameters = _get_param_examples(
1149
  endpoints['named_endpoints'][api_name]['parameters']
1150
  )
 
1152
  else:
1153
  # endpoint index is the first character
1154
  fn_index = int(HF_SPACES[model]['function'])
1155
+
1156
  end_parameters = _get_param_examples(
1157
  endpoints['unnamed_endpoints'][str(fn_index)]['parameters']
1158
  )
 
1249
  pass
1250
 
1251
  return inputs
1252
+
1253
  def _cache_sample(text, model):
1254
  # skip caching if not hardcoded sentence
1255
  if (text not in sents):
 
1311
  # cache the result
1312
  for model in [mdl1k, mdl2k]:
1313
  _cache_sample(text, model)
1314
+
1315
  #debug
1316
  # print(results)
1317
  # print(list(results.keys())[0])
test_tts_e2_f5_f5.py CHANGED
@@ -9,6 +9,7 @@ result = client.predict(
9
  gen_text_input="Please surprise me and speak in whatever voice you enjoy.",
10
  remove_silence=False,
11
  cross_fade_duration_slider=0.15,
 
12
  speed_slider=1,
13
  api_name="/basic_tts",
14
  )
 
9
  gen_text_input="Please surprise me and speak in whatever voice you enjoy.",
10
  remove_silence=False,
11
  cross_fade_duration_slider=0.15,
12
+ nfe_slider=32,
13
  speed_slider=1,
14
  api_name="/basic_tts",
15
  )