Spaces:

startharik
/

ms-asr-transducer

Build error

ms-asr-transducer / app.py

Update app.py

44b82fc about 2 years ago

1.67 kB

	import os

	os.environ['CUDA_VISIBLE_DEVICES'] = ''
	os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

	import malaya_speech
	from malaya_speech.utils.astype import float_to_int
	from pyctcdecode import build_ctcdecoder
	from malaya_speech.utils.char import CTC_VOCAB
	from glob import glob
	import kenlm
	import gradio as gr
	import logging
	import json

	logging.basicConfig(level=logging.INFO)

	SR = 16000
	MODELS = {}
	AVAILABLE_MODELS = malaya_speech.stt.available_transducer().index.tolist()

	wavs = glob('audio/*.wav')

	def load_audio_wav(filename):
	print(filename)
	y, sr = malaya_speech.load(filename)
	return y, sr

	def tts(upload, record, model):

	if record:
	rate, y = record
	else:
	y, rate = load_audio_wav(upload)

	if len(y.shape) == 2:
	y = y.T[0]

	N = len(y)
	print(y, N)

	y_16k = malaya_speech.resample(y, rate, SR)

	global MODELS
	if model not in MODELS:
	logging.info(f'{model} not in MODELS')
	MODELS[model] = malaya_speech.stt.deep_transducer(model=model)

	t = MODELS[model].greedy_decoder([y_16k])[0]
	return t

	examples = []
	for f in wavs:
	examples.append([f, None, 'conformer'])

	demo = gr.Interface(
	fn=tts,
	inputs=[
	gr.Audio(source='upload', label = 'upload WAV file', type='filepath'),
	gr.Audio(source='microphone', label = 'or record using microphone'),
	gr.components.Dropdown(label='Available models', choices=AVAILABLE_MODELS, value = 'conformer'),
	],
	outputs=['text'],
	examples=examples,
	cache_examples=False,
	title='ASR TRANSDUCER - TNB VOICE',
	description='Fastest'
	)

	demo.launch(server_name='0.0.0.0')