File size: 16,040 Bytes
66182b2
d40e945
 
 
 
cc1e3ba
d40e945
 
 
 
 
 
 
f1c2985
 
66182b2
 
f1c2985
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d40e945
 
 
 
 
 
 
 
f968ac4
d40e945
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66182b2
 
 
 
d40e945
 
 
 
66182b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d40e945
66182b2
 
 
d40e945
66182b2
 
d40e945
66182b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d40e945
 
 
 
 
66182b2
d40e945
 
66182b2
d40e945
 
66182b2
d40e945
 
 
66182b2
d40e945
66182b2
d40e945
 
 
66182b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cc1e3ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d40e945
 
 
 
 
 
 
66182b2
 
 
 
 
 
 
 
 
 
 
 
cc1e3ba
66182b2
 
cc1e3ba
66182b2
 
 
 
 
 
 
 
 
 
 
 
 
 
cc1e3ba
 
66182b2
d40e945
 
 
 
 
 
 
f968ac4
 
d40e945
 
 
 
 
5a35ee6
 
 
d40e945
 
 
 
f968ac4
d40e945
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66182b2
d40e945
 
 
 
 
 
 
 
 
 
 
 
f968ac4
 
d40e945
 
 
 
 
 
 
 
cc1e3ba
d40e945
 
 
5a35ee6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
import time
from .models import *
from .utils import *
from .config import *
from .init import *
from .sample_caching import *

import gradio as gr
from pydub import AudioSegment
import random, os, threading, tempfile
from langdetect import detect
from .vote import log_text

# top five models in order to always have one of them picked and scrutinized
top_five = ['fishaudio/fish-speech-1'] # fish 1.5
hf_token=os.getenv('HF_TOKEN')

# prioritize low vote models
sql = 'SELECT name FROM model WHERE (upvote + downvote) < 750 ORDER BY (upvote + downvote) ASC'
conn = get_db()
cursor = conn.cursor()
cursor.execute(sql)
data = cursor.fetchall()
for model in data:
    if (
        len(top_five) >= 5
    ):
        break

    if model[0] in AVAILABLE_MODELS.keys():
        top_five.append(model[0])
print(f"low vote top_five: {top_five}")

def random_m():
    return random.sample(list(set(AVAILABLE_MODELS.keys())), 2)

def check_toxicity(text):
    if not TOXICITY_CHECK:
        return False
    return toxicity.predict(text)['toxicity'] > 0.8

def synthandreturn(text, autoplay, request: gr.Request):
    text = text.strip()
    if len(text) > MAX_SAMPLE_TXT_LENGTH:
        raise gr.Error(f'You exceeded the limit of {MAX_SAMPLE_TXT_LENGTH} characters')
    if len(text) < MIN_SAMPLE_TXT_LENGTH:
        raise gr.Error(f'Please input a text longer than {MIN_SAMPLE_TXT_LENGTH} characters')
    if (
        # test toxicity if not prepared text
        text not in sents
        and check_toxicity(text)
    ):
        print(f'Detected toxic content! "{text}"')
        raise gr.Error('Your text failed the toxicity test')
    if not text:
        raise gr.Error(f'You did not enter any text')
    # Check language
    try:
        if (
            text not in sents
            and not detect(text) == "en"
        ):
            gr.Warning('Warning: The input text may not be in English')
    except:
        pass
    # Get two random models

    # forced model: your TTS model versus The World!!!
    # mdl1 = 'Pendrokar/xVASynth'

    # scrutinize the top five by always picking one of them
    if (len(top_five) >= 5):
        mdl1 = random.sample(top_five, 1)[0]
        vsModels = dict(AVAILABLE_MODELS)
        del vsModels[mdl1]
        # randomize position of the forced model
        mdl2 = random.sample(list(vsModels.keys()), 1)
        # forced random
        mdl1, mdl2 = random.sample(list([mdl1, mdl2[0]]), 2)
    else:
        # actual random
        mdl1, mdl2 = random.sample(list(AVAILABLE_MODELS.keys()), 2)

    print("[debug] Using", mdl1, mdl2)
    def predict_and_update_result(text, model, result_storage, request:gr.Request):

        hf_headers = {}
        try:
            if HF_SPACES[model]['is_zero_gpu_space']:
                hf_headers = {"X-IP-Token": request.headers['x-ip-token']}
        except:
            pass

        # re-attempt if necessary
        attempt_count = 0
        max_attempts = 1 # 3 =May cause 429 Too Many Request
        while attempt_count < max_attempts:
            try:
                if model in AVAILABLE_MODELS:
                    if '/' in model:
                        # Use public HF Space
                        # if (model not in hf_clients):
                        #     hf_clients[model] = Client(model, hf_token=hf_token, headers=hf_headers)
                        mdl_space = Client(model, hf_token=hf_token, headers=hf_headers)

                        # print(f"{model}: Fetching endpoints of HF Space")
                        # assume the index is one of the first 9 return params
                        return_audio_index = int(HF_SPACES[model]['return_audio_index'])
                        endpoints = mdl_space.view_api(all_endpoints=True, print_info=False, return_format='dict')

                        api_name = None
                        fn_index = None
                        end_parameters = None
                        # has named endpoint
                        if '/' == HF_SPACES[model]['function'][0]:
                            # audio sync function name
                            api_name = HF_SPACES[model]['function']

                            end_parameters = _get_param_examples(
                                endpoints['named_endpoints'][api_name]['parameters']
                            )
                        # has unnamed endpoint
                        else:
                            # endpoint index is the first character
                            fn_index = int(HF_SPACES[model]['function'])

                            end_parameters = _get_param_examples(
                                endpoints['unnamed_endpoints'][str(fn_index)]['parameters']
                            )

                        # override some or all default parameters
                        space_inputs = _override_params(end_parameters, model)

                        # force text
                        space_inputs[HF_SPACES[model]['text_param_index']] = text

                        print(f"{model}: Sending request to HF Space")
                        results = mdl_space.predict(*space_inputs, api_name=api_name, fn_index=fn_index)

                        # return path to audio
                        result = results
                        if (not isinstance(results, str)):
                            # return_audio_index may be a filepath string
                            result = results[return_audio_index]
                        if (isinstance(result, dict)):
                            # return_audio_index is a dictionary
                            result = results[return_audio_index]['value']
                    else:
                        # Use the private HF Space
                        result = router.predict(text, AVAILABLE_MODELS[model].lower(), api_name="/synthesize")
                else:
                    result = router.predict(text, model.lower(), api_name="/synthesize")
                break
            except Exception as e:
                attempt_count += 1
                raise gr.Error(f"{model}:"+ repr(e))
                # print(f"{model}: Unable to call API (attempt: {attempt_count})")
                # sleep for three seconds to avoid spamming the server with requests
                # time.sleep(3)

                # Fetch and store client again
                # hf_clients[model] = Client(model, hf_token=hf_token, headers=hf_headers)

        if attempt_count >= max_attempts:
            raise gr.Error(f"{model}: Failed to call model")
        else:
            print('Done with', model)

        try:
            with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
                audio = AudioSegment.from_file(result)
                current_sr = audio.frame_rate
                if current_sr > 24000:
                    print(f"{model}: Resampling")
                    audio = audio.set_frame_rate(24000)
                try:
                    print(f"{model}: Trying to normalize audio")
                    audio = match_target_amplitude(audio, -20)
                except:
                    print(f"{model}: [WARN] Unable to normalize audio")
                audio.export(f.name, format="wav")
                os.unlink(result)
                result = f.name
                gr.Info('Audio from a TTS model received')
        except:
            print(f"{model}: [WARN] Unable to resample audio")
            pass
        if model in AVAILABLE_MODELS.keys(): model = AVAILABLE_MODELS[model]
        result_storage[model] = result

    def _get_param_examples(parameters):
        example_inputs = []
        for param_info in parameters:
            if (
                param_info['component'] == 'Radio'
                or param_info['component'] == 'Dropdown'
                or param_info['component'] == 'Audio'
                or param_info['python_type']['type'] == 'str'
            ):
                example_inputs.append(str(param_info['example_input']))
                continue
            if param_info['python_type']['type'] == 'int':
                example_inputs.append(int(param_info['example_input']))
                continue
            if param_info['python_type']['type'] == 'float':
                example_inputs.append(float(param_info['example_input']))
                continue
            if param_info['python_type']['type'] == 'bool':
                example_inputs.append(bool(param_info['example_input']))
                continue

        return example_inputs

    def _override_params(inputs, modelname):
        try:
            for key,value in OVERRIDE_INPUTS[modelname].items():
                inputs[key] = value
            print(f"{modelname}: Default inputs overridden by Arena")
        except:
            pass

        return inputs

    def _cache_sample(text, model):
        # skip caching if not hardcoded sentence
        if (text not in sents):
            return False

        already_cached = False
        # check if already cached
        for cached_sample in cached_samples:
            # TODO:replace cached with newer version
            if (cached_sample.transcript == text and cached_sample.modelName == model):
                already_cached = True
                return True

        if (already_cached):
            return False

        try:
            cached_samples.append(Sample(results[model], text, model))
        except:
            print('Error when trying to cache sample')
            return False

    mdl1k = mdl1
    mdl2k = mdl2
    print(mdl1k, mdl2k)
    if mdl1 in AVAILABLE_MODELS.keys(): mdl1k=AVAILABLE_MODELS[mdl1]
    if mdl2 in AVAILABLE_MODELS.keys(): mdl2k=AVAILABLE_MODELS[mdl2]
    results = {}
    print(f"Sending models {mdl1k} and {mdl2k} to API")

    # do not use multithreading when both spaces are ZeroGPU type
    if (
        # exists
        'is_zero_gpu_space' in HF_SPACES[mdl1]
        # is True
        and HF_SPACES[mdl1]['is_zero_gpu_space']
        and 'is_zero_gpu_space' in HF_SPACES[mdl2]
        and HF_SPACES[mdl2]['is_zero_gpu_space']
    ):
        # run Zero-GPU spaces one at a time
        predict_and_update_result(text, mdl1k, results, request)
        _cache_sample(text, mdl1k)

        predict_and_update_result(text, mdl2k, results, request)
        _cache_sample(text, mdl2k)
    else:
        # use multithreading
        thread1 = threading.Thread(target=predict_and_update_result, args=(text, mdl1k, results, request))
        thread2 = threading.Thread(target=predict_and_update_result, args=(text, mdl2k, results, request))

        thread1.start()
        # wait 3 seconds to calm hf.space domain
        time.sleep(3)
        thread2.start()
        # timeout in 2 minutes
        thread1.join(120)
        thread2.join(120)

        # cache the result
        for model in [mdl1k, mdl2k]:
            _cache_sample(text, model)

    print(f"Retrieving models {mdl1k} and {mdl2k} from API")
    return (
        text,
        "Synthesize",
        gr.update(visible=True), # r2
        mdl1, # model1
        mdl2, # model2
        gr.update(visible=True, value=results[mdl1k], autoplay=autoplay), # aud1
        gr.update(visible=True, value=results[mdl2k], autoplay=False), # aud2
        gr.update(visible=True, interactive=False), #abetter
        gr.update(visible=True, interactive=False), #bbetter
        gr.update(visible=False), #prevmodel1
        gr.update(visible=False), #prevmodel2
        gr.update(visible=False), #nxt round btn
        # reset gr.State aplayed & bplayed
        False, #aplayed
        False, #bplayed
    )

# Battle Mode

def synthandreturn_battle(text, mdl1, mdl2, autoplay):
    if mdl1 == mdl2:
        raise gr.Error('You can\'t pick two of the same models.')
    text = text.strip()
    if len(text) > MAX_SAMPLE_TXT_LENGTH:
        raise gr.Error(f'You exceeded the limit of {MAX_SAMPLE_TXT_LENGTH} characters')
    if len(text) < MIN_SAMPLE_TXT_LENGTH:
        raise gr.Error(f'Please input a text longer than {MIN_SAMPLE_TXT_LENGTH} characters')
    if (
        # test toxicity if not prepared text
        text not in sents
        and check_toxicity(text)
    ):
        print(f'Detected toxic content! "{text}"')
        raise gr.Error('Your text failed the toxicity test')
    if not text:
        raise gr.Error(f'You did not enter any text')
    # Check language
    try:
        if not detect(text) == "en":
            gr.Warning('Warning: The input text may not be in English')
    except:
        pass
    # Get two random models
    log_text(text)
    print("[debug] Using", mdl1, mdl2)
    def predict_and_update_result(text, model, result_storage):
        try:
            if model in AVAILABLE_MODELS:
                result = router.predict(text, AVAILABLE_MODELS[model].lower(), api_name="/synthesize")
            else:
                result = router.predict(text, model.lower(), api_name="/synthesize")
        except:
            raise gr.Error('Unable to call API, please try again :)')
        print('Done with', model)
        # try:
        #     doresample(result)
        # except:
        #     pass
        try:
            with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
                audio = AudioSegment.from_file(result)
                current_sr = audio.frame_rate
                if current_sr > 24000:
                    audio = audio.set_frame_rate(24000)
                try:
                    print('Trying to normalize audio')
                    audio = match_target_amplitude(audio, -20)
                except:
                    print('[WARN] Unable to normalize audio')
                audio.export(f.name, format="wav")
                os.unlink(result)
                result = f.name
        except:
            pass
        if model in AVAILABLE_MODELS.keys(): model = AVAILABLE_MODELS[model]
        print(model)
        print(f"Running model {model}")
        result_storage[model] = result
        # try:
        #     doloudnorm(result)
        # except:
        #     pass
    mdl1k = mdl1
    mdl2k = mdl2
    print(mdl1k, mdl2k)
    if mdl1 in AVAILABLE_MODELS.keys(): mdl1k=AVAILABLE_MODELS[mdl1]
    if mdl2 in AVAILABLE_MODELS.keys(): mdl2k=AVAILABLE_MODELS[mdl2]
    results = {}
    print(f"Sending models {mdl1k} and {mdl2k} to API")
    thread1 = threading.Thread(target=predict_and_update_result, args=(text, mdl1k, results))
    thread2 = threading.Thread(target=predict_and_update_result, args=(text, mdl2k, results))

    thread1.start()
    thread2.start()
    thread1.join()
    thread2.join()

    print(f"Retrieving models {mdl1k} and {mdl2k} from API")
    return (
        text,
        "Synthesize",
        gr.update(visible=True), # r2
        mdl1, # model1
        mdl2, # model2
        gr.update(visible=True, value=results[mdl1k], autoplay=autoplay), # aud1
        gr.update(visible=True, value=results[mdl2k], autoplay=False), # aud2
        gr.update(visible=True, interactive=False), #abetter
        gr.update(visible=True, interactive=False), #bbetter
        gr.update(visible=False), #prevmodel1
        gr.update(visible=False), #prevmodel2
        gr.update(visible=False), #nxt round btn
    )

def randomsent():
    return '⚡', random.choice(sents), '🎲'
def randomsent_battle():
    return tuple(randomsent()) + tuple(random_m())
def clear_stuff():
    return [
        gr.update(visible=True, value="", elem_classes=[]),
        "Synthesize",
        gr.update(visible=False), # r2
        '', # model1
        '', # model2
        gr.update(visible=False, interactive=False, autoplay=False), # aud1
        gr.update(visible=False, interactive=False, autoplay=False), # aud2
        gr.update(visible=False, interactive=False), #abetter
        gr.update(visible=False, interactive=False), #bbetter
        gr.update(visible=False), #prevmodel1
        gr.update(visible=False), #prevmodel2
        gr.update(visible=False), #nxt round btn
        False, #aplayed
        False, #bplayed
    ]