TTS-Spaces-Arena

Running

App Files Files Community

TTS-Spaces-Arena / app /synth.py

Pendrokar

top_five by votes

f1c2985 23 days ago

raw

history blame

16 kB

	import time
	from .models import *
	from .utils import *
	from .config import *
	from .init import *
	from .sample_caching import *

	import gradio as gr
	from pydub import AudioSegment
	import random, os, threading, tempfile
	from langdetect import detect
	from .vote import log_text

	# top five models in order to always have one of them picked and scrutinized
	top_five = ['fishaudio/fish-speech-1'] # fish 1.5
	hf_token=os.getenv('HF_TOKEN')

	# prioritize low vote models
	sql = 'SELECT name FROM model WHERE (upvote + downvote) < 750 ORDER BY (upvote + downvote) ASC'
	conn = get_db()
	cursor = conn.cursor()
	cursor.execute(sql)
	data = cursor.fetchall()
	for model in data:
	if (
	len(top_five) >= 5
	):
	break

	if model[0] in AVAILABLE_MODELS.keys():
	top_five.append(model[0])
	print(f"low vote top_five: {top_five}")

	def random_m():
	return random.sample(list(set(AVAILABLE_MODELS.keys())), 2)

	def check_toxicity(text):
	if not TOXICITY_CHECK:
	return False
	return toxicity.predict(text)['toxicity'] > 0.8

	def synthandreturn(text, autoplay, request: gr.Request):
	text = text.strip()
	if len(text) > MAX_SAMPLE_TXT_LENGTH:
	raise gr.Error(f'You exceeded the limit of {MAX_SAMPLE_TXT_LENGTH} characters')
	if len(text) < MIN_SAMPLE_TXT_LENGTH:
	raise gr.Error(f'Please input a text longer than {MIN_SAMPLE_TXT_LENGTH} characters')
	if (
	# test toxicity if not prepared text
	text not in sents
	and check_toxicity(text)
	):
	print(f'Detected toxic content! "{text}"')
	raise gr.Error('Your text failed the toxicity test')
	if not text:
	raise gr.Error(f'You did not enter any text')
	# Check language
	try:
	if (
	text not in sents
	and not detect(text) == "en"
	):
	gr.Warning('Warning: The input text may not be in English')
	except:
	pass
	# Get two random models

	# forced model: your TTS model versus The World!!!
	# mdl1 = 'Pendrokar/xVASynth'

	# scrutinize the top five by always picking one of them
	if (len(top_five) >= 5):
	mdl1 = random.sample(top_five, 1)[0]
	vsModels = dict(AVAILABLE_MODELS)
	del vsModels[mdl1]
	# randomize position of the forced model
	mdl2 = random.sample(list(vsModels.keys()), 1)
	# forced random
	mdl1, mdl2 = random.sample(list([mdl1, mdl2[0]]), 2)
	else:
	# actual random
	mdl1, mdl2 = random.sample(list(AVAILABLE_MODELS.keys()), 2)

	print("[debug] Using", mdl1, mdl2)
	def predict_and_update_result(text, model, result_storage, request:gr.Request):

	hf_headers = {}
	try:
	if HF_SPACES[model]['is_zero_gpu_space']:
	hf_headers = {"X-IP-Token": request.headers['x-ip-token']}
	except:
	pass

	# re-attempt if necessary
	attempt_count = 0
	max_attempts = 1 # 3 =May cause 429 Too Many Request
	while attempt_count < max_attempts:
	try:
	if model in AVAILABLE_MODELS:
	if '/' in model:
	# Use public HF Space
	# if (model not in hf_clients):
	# hf_clients[model] = Client(model, hf_token=hf_token, headers=hf_headers)
	mdl_space = Client(model, hf_token=hf_token, headers=hf_headers)

	# print(f"{model}: Fetching endpoints of HF Space")
	# assume the index is one of the first 9 return params
	return_audio_index = int(HF_SPACES[model]['return_audio_index'])
	endpoints = mdl_space.view_api(all_endpoints=True, print_info=False, return_format='dict')

	api_name = None
	fn_index = None
	end_parameters = None
	# has named endpoint
	if '/' == HF_SPACES[model]['function'][0]:
	# audio sync function name
	api_name = HF_SPACES[model]['function']

	end_parameters = _get_param_examples(
	endpoints['named_endpoints'][api_name]['parameters']
	)
	# has unnamed endpoint
	else:
	# endpoint index is the first character
	fn_index = int(HF_SPACES[model]['function'])

	end_parameters = _get_param_examples(
	endpoints['unnamed_endpoints'][str(fn_index)]['parameters']
	)

	# override some or all default parameters
	space_inputs = _override_params(end_parameters, model)

	# force text
	space_inputs[HF_SPACES[model]['text_param_index']] = text

	print(f"{model}: Sending request to HF Space")
	results = mdl_space.predict(*space_inputs, api_name=api_name, fn_index=fn_index)

	# return path to audio
	result = results
	if (not isinstance(results, str)):
	# return_audio_index may be a filepath string
	result = results[return_audio_index]
	if (isinstance(result, dict)):
	# return_audio_index is a dictionary
	result = results[return_audio_index]['value']
	else:
	# Use the private HF Space
	result = router.predict(text, AVAILABLE_MODELS[model].lower(), api_name="/synthesize")
	else:
	result = router.predict(text, model.lower(), api_name="/synthesize")
	break
	except Exception as e:
	attempt_count += 1
	raise gr.Error(f"{model}:"+ repr(e))
	# print(f"{model}: Unable to call API (attempt: {attempt_count})")
	# sleep for three seconds to avoid spamming the server with requests
	# time.sleep(3)

	# Fetch and store client again
	# hf_clients[model] = Client(model, hf_token=hf_token, headers=hf_headers)

	if attempt_count >= max_attempts:
	raise gr.Error(f"{model}: Failed to call model")
	else:
	print('Done with', model)

	try:
	with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
	audio = AudioSegment.from_file(result)
	current_sr = audio.frame_rate
	if current_sr > 24000:
	print(f"{model}: Resampling")
	audio = audio.set_frame_rate(24000)
	try:
	print(f"{model}: Trying to normalize audio")
	audio = match_target_amplitude(audio, -20)
	except:
	print(f"{model}: [WARN] Unable to normalize audio")
	audio.export(f.name, format="wav")
	os.unlink(result)
	result = f.name
	gr.Info('Audio from a TTS model received')
	except:
	print(f"{model}: [WARN] Unable to resample audio")
	pass
	if model in AVAILABLE_MODELS.keys(): model = AVAILABLE_MODELS[model]
	result_storage[model] = result

	def _get_param_examples(parameters):
	example_inputs = []
	for param_info in parameters:
	if (
	param_info['component'] == 'Radio'
	or param_info['component'] == 'Dropdown'
	or param_info['component'] == 'Audio'
	or param_info['python_type']['type'] == 'str'
	):
	example_inputs.append(str(param_info['example_input']))
	continue
	if param_info['python_type']['type'] == 'int':
	example_inputs.append(int(param_info['example_input']))
	continue
	if param_info['python_type']['type'] == 'float':
	example_inputs.append(float(param_info['example_input']))
	continue
	if param_info['python_type']['type'] == 'bool':
	example_inputs.append(bool(param_info['example_input']))
	continue

	return example_inputs

	def _override_params(inputs, modelname):
	try:
	for key,value in OVERRIDE_INPUTS[modelname].items():
	inputs[key] = value
	print(f"{modelname}: Default inputs overridden by Arena")
	except:
	pass

	return inputs

	def _cache_sample(text, model):
	# skip caching if not hardcoded sentence
	if (text not in sents):
	return False

	already_cached = False
	# check if already cached
	for cached_sample in cached_samples:
	# TODO:replace cached with newer version
	if (cached_sample.transcript == text and cached_sample.modelName == model):
	already_cached = True
	return True

	if (already_cached):
	return False

	try:
	cached_samples.append(Sample(results[model], text, model))
	except:
	print('Error when trying to cache sample')
	return False

	mdl1k = mdl1
	mdl2k = mdl2
	print(mdl1k, mdl2k)
	if mdl1 in AVAILABLE_MODELS.keys(): mdl1k=AVAILABLE_MODELS[mdl1]
	if mdl2 in AVAILABLE_MODELS.keys(): mdl2k=AVAILABLE_MODELS[mdl2]
	results = {}
	print(f"Sending models {mdl1k} and {mdl2k} to API")

	# do not use multithreading when both spaces are ZeroGPU type
	if (
	# exists
	'is_zero_gpu_space' in HF_SPACES[mdl1]
	# is True
	and HF_SPACES[mdl1]['is_zero_gpu_space']
	and 'is_zero_gpu_space' in HF_SPACES[mdl2]
	and HF_SPACES[mdl2]['is_zero_gpu_space']
	):
	# run Zero-GPU spaces one at a time
	predict_and_update_result(text, mdl1k, results, request)
	_cache_sample(text, mdl1k)

	predict_and_update_result(text, mdl2k, results, request)
	_cache_sample(text, mdl2k)
	else:
	# use multithreading
	thread1 = threading.Thread(target=predict_and_update_result, args=(text, mdl1k, results, request))
	thread2 = threading.Thread(target=predict_and_update_result, args=(text, mdl2k, results, request))

	thread1.start()
	# wait 3 seconds to calm hf.space domain
	time.sleep(3)
	thread2.start()
	# timeout in 2 minutes
	thread1.join(120)
	thread2.join(120)

	# cache the result
	for model in [mdl1k, mdl2k]:
	_cache_sample(text, model)

	print(f"Retrieving models {mdl1k} and {mdl2k} from API")
	return (
	text,
	"Synthesize",
	gr.update(visible=True), # r2
	mdl1, # model1
	mdl2, # model2
	gr.update(visible=True, value=results[mdl1k], autoplay=autoplay), # aud1
	gr.update(visible=True, value=results[mdl2k], autoplay=False), # aud2
	gr.update(visible=True, interactive=False), #abetter
	gr.update(visible=True, interactive=False), #bbetter
	gr.update(visible=False), #prevmodel1
	gr.update(visible=False), #prevmodel2
	gr.update(visible=False), #nxt round btn
	# reset gr.State aplayed & bplayed
	False, #aplayed
	False, #bplayed
	)

	# Battle Mode

	def synthandreturn_battle(text, mdl1, mdl2, autoplay):
	if mdl1 == mdl2:
	raise gr.Error('You can\'t pick two of the same models.')
	text = text.strip()
	if len(text) > MAX_SAMPLE_TXT_LENGTH:
	raise gr.Error(f'You exceeded the limit of {MAX_SAMPLE_TXT_LENGTH} characters')
	if len(text) < MIN_SAMPLE_TXT_LENGTH:
	raise gr.Error(f'Please input a text longer than {MIN_SAMPLE_TXT_LENGTH} characters')
	if (
	# test toxicity if not prepared text
	text not in sents
	and check_toxicity(text)
	):
	print(f'Detected toxic content! "{text}"')
	raise gr.Error('Your text failed the toxicity test')
	if not text:
	raise gr.Error(f'You did not enter any text')
	# Check language
	try:
	if not detect(text) == "en":
	gr.Warning('Warning: The input text may not be in English')
	except:
	pass
	# Get two random models
	log_text(text)
	print("[debug] Using", mdl1, mdl2)
	def predict_and_update_result(text, model, result_storage):
	try:
	if model in AVAILABLE_MODELS:
	result = router.predict(text, AVAILABLE_MODELS[model].lower(), api_name="/synthesize")
	else:
	result = router.predict(text, model.lower(), api_name="/synthesize")
	except:
	raise gr.Error('Unable to call API, please try again :)')
	print('Done with', model)
	# try:
	# doresample(result)
	# except:
	# pass
	try:
	with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
	audio = AudioSegment.from_file(result)
	current_sr = audio.frame_rate
	if current_sr > 24000:
	audio = audio.set_frame_rate(24000)
	try:
	print('Trying to normalize audio')
	audio = match_target_amplitude(audio, -20)
	except:
	print('[WARN] Unable to normalize audio')
	audio.export(f.name, format="wav")
	os.unlink(result)
	result = f.name
	except:
	pass
	if model in AVAILABLE_MODELS.keys(): model = AVAILABLE_MODELS[model]
	print(model)
	print(f"Running model {model}")
	result_storage[model] = result
	# try:
	# doloudnorm(result)
	# except:
	# pass
	mdl1k = mdl1
	mdl2k = mdl2
	print(mdl1k, mdl2k)
	if mdl1 in AVAILABLE_MODELS.keys(): mdl1k=AVAILABLE_MODELS[mdl1]
	if mdl2 in AVAILABLE_MODELS.keys(): mdl2k=AVAILABLE_MODELS[mdl2]
	results = {}
	print(f"Sending models {mdl1k} and {mdl2k} to API")
	thread1 = threading.Thread(target=predict_and_update_result, args=(text, mdl1k, results))
	thread2 = threading.Thread(target=predict_and_update_result, args=(text, mdl2k, results))

	thread1.start()
	thread2.start()
	thread1.join()
	thread2.join()

	print(f"Retrieving models {mdl1k} and {mdl2k} from API")
	return (
	text,
	"Synthesize",
	gr.update(visible=True), # r2
	mdl1, # model1
	mdl2, # model2
	gr.update(visible=True, value=results[mdl1k], autoplay=autoplay), # aud1
	gr.update(visible=True, value=results[mdl2k], autoplay=False), # aud2
	gr.update(visible=True, interactive=False), #abetter
	gr.update(visible=True, interactive=False), #bbetter
	gr.update(visible=False), #prevmodel1
	gr.update(visible=False), #prevmodel2
	gr.update(visible=False), #nxt round btn
	)

	def randomsent():
	return '⚡', random.choice(sents), '🎲'
	def randomsent_battle():
	return tuple(randomsent()) + tuple(random_m())
	def clear_stuff():
	return [
	gr.update(visible=True, value="", elem_classes=[]),
	"Synthesize",
	gr.update(visible=False), # r2
	'', # model1
	'', # model2
	gr.update(visible=False, interactive=False, autoplay=False), # aud1
	gr.update(visible=False, interactive=False, autoplay=False), # aud2
	gr.update(visible=False, interactive=False), #abetter
	gr.update(visible=False, interactive=False), #bbetter
	gr.update(visible=False), #prevmodel1
	gr.update(visible=False), #prevmodel2
	gr.update(visible=False), #nxt round btn
	False, #aplayed
	False, #bplayed
	]