Spaces:
Runtime error
Runtime error
import os | |
import json | |
import openai | |
import tempfile | |
import gradio as gr | |
import infer | |
import config | |
from neon_tts_plugin_coqui import CoquiTTS | |
title = "Speech to ChatGPT to Speech" | |
coquiTTS = CoquiTTS() | |
LANGUAGES = list(CoquiTTS.langs.keys()) | |
LANGUAGES = LANGUAGES + ['cn', 'jp'] | |
default_lang = "en" | |
whisper = gr.Interface.load(name="spaces/sanchit-gandhi/whisper-large-v2") | |
api_key = os.environ.get('api_key') | |
#if you have OpenAI API key as a string, enable the below | |
openai.api_key = api_key | |
pth_path = config.pth_path | |
config_json = config.config_json | |
net_g_ms, hps = infer.load_model(config_json, pth_path) | |
# ChatGPT | |
def chat_hf(audio, custom_token, language): | |
try: | |
whisper_text = translate(audio) | |
if whisper_text == "ERROR: You have to either use the microphone or upload an audio file": | |
gpt_response = "MISSING AUDIO: Record your voice by clicking the microphone button, do not forget to stop recording before sending your message ;)" | |
else: | |
gpt_response = openai_create(whisper_text) | |
except: | |
whisper_text = translate(audio) | |
gpt_response = """Sorry, I'm quite busy right now, but please try again later :)""" | |
# to voice | |
if language == 'cn' or 'jp': | |
text = infer.clean_text(gpt_response) | |
audio = infer.infer(text, net_g_ms, 2, "demo") | |
voice_out = (hps.data.sampling_rate, audio) | |
else: | |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp: | |
coquiTTS.get_tts(gpt_response, fp, speaker = {"language" : language}) | |
voice_out = fp.name | |
return whisper_text, gpt_response, voice_out | |
def translate(audio): | |
print(""" | |
— | |
Sending audio to Whisper ... | |
— | |
""") | |
text_result = whisper(audio, None, "transcribe", fn_index=0) | |
print(text_result) | |
return text_result | |
def openai_create(prompt): | |
response = openai.Completion.create( | |
model="text-davinci-003", | |
prompt=prompt, | |
temperature=0.9, | |
max_tokens=150, | |
top_p=1, | |
frequency_penalty=0, | |
presence_penalty=0.6, | |
stop=[" Human:", " AI:"] | |
) | |
print(response.choices[0].text) | |
return response.choices[0].text | |
with gr.Blocks() as blocks: | |
gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>" + title + "</h1>") | |
radio = gr.Radio(label="Language", choices=LANGUAGES, value=default_lang) | |
with gr.Row(equal_height=True):# equal_height=False | |
with gr.Column():# variant="panel" | |
audio_file = gr.Audio(source="microphone", type="filepath") | |
custom_token = gr.Textbox(label='If it fails, use your own session token', placeholder="your own session token") | |
with gr.Row():# mobile_collapse=False | |
submit = gr.Button("Submit", variant="primary") | |
with gr.Column(): | |
text1 = gr.Textbox(label="Speech to Text") | |
text2 = gr.Textbox(label="ChatGPT Response") | |
audio = gr.Audio(label="Output", interactive=False) | |
# actions | |
submit.click( | |
chat_hf, | |
[audio_file, custom_token, radio], | |
[text1, text2, audio], | |
) | |
blocks.launch(debug=True) | |