import numpy as np import gradio as gr from scipy.io import wavfile from espnet2.bin.tts_inference import Text2Speech from arabic_pronounce import phonetise import soundfile as sf title = " Tunisian Text To Speech" description = """ This is a demo for our Tunisian TTS system. You can write your dicritized tunisian text to synthesis the corresponding speech. This project project was developed with the purpose of bridging the gap between high-resource and low-resource languages. If you need help, feel free to drop an email here : fethi.bougares@elyadata.com rami.kammoun@algobrain.ai imen.laouirine@elyadata.com Authors : * [Imen Laouirine](https://www.linkedin.com/in/imen-laouirine-9a557b209) * [Rami Kammoun](https://www.linkedin.com/in/rami-kammoun/) * [Fethi Bougares](https://www.linkedin.com/in/fethi-bougares/) More implementation details could be found in[ ![GitHub](https://img.shields.io/badge/github-%23121011.svg?style=for-the-badge&logo=github&logoColor=white) ](https://github.com/elyadata/TunArTTS/tree/develop) More in-depth details and insights are available in a released preprint. Please find the paper [here](paper_link). If you use or refer to this model, please cite : """ examples = [ ["يْكِنّْلُو مَشَاعِرْ قْوِيَّة يْكِنّْلُو مَشَاعِرْ قْوِيَّة"], ["سَارَقْ وْفِي يِدُّو شَمْعَة"], ["صَامْ نْهَارْ مِنْ رُمْضَانْ، قَالْ العِيدْ آشْ مَازَالُو؟"], ["ضَحْكُولُو تْمَدْ عْلَى طُولُو"], ["عَارِكْ وْخَلِّي لِلْصُلْحْ مْكَانْ"] ] def text_to_phoneme(tun_text): space_split = tun_text.split(" ") res = "" for i in range(len(space_split)): res +=" "+phonetise(space_split[i])[0] res = res.strip() res = "sil "+res+" sil" return res def generate_tts(input_text): phonemized_text = text_to_phoneme(input_text) tts = Text2Speech.from_pretrained(model_file="exp/tts_train_conformer_fastspeech2_raw_phn_none/train.loss.ave_5best.pth", vocoder_file="train_tun_parallel_wavegan.v3/checkpoint-560000steps.pkl") wav = tts(f"sil {phonemized_text} sil")["wav"] audio_data = wav.numpy() sf.write('output.wav', audio_data, samplerate=22050) def generate_audio(inputs): generate_tts(inputs) wav_file_path = "output.wav" sr, audio_data = wavfile.read(wav_file_path) return sr, audio_data demo = gr.Interface( title= title, description=description, fn=generate_audio, examples = examples, inputs= gr.Text(label="Input Text"), outputs ="audio") if __name__ == "__main__": demo.launch()