Spaces:
Runtime error
Runtime error
File size: 3,305 Bytes
dda3d27 4f485d9 dda3d27 4f485d9 dda3d27 4f485d9 dda3d27 4f485d9 0694ca8 4f485d9 31a365d 4f485d9 dda3d27 82346b0 3828e69 21789e6 dda3d27 4f485d9 dda3d27 4f485d9 3828e69 4f485d9 82346b0 4f485d9 dda3d27 4f485d9 dda3d27 4f485d9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
import os
import json
import openai
import tempfile
import gradio as gr
import infer
import config
from neon_tts_plugin_coqui import CoquiTTS
title = "Speech to ChatGPT to Speech"
coquiTTS = CoquiTTS()
LANGUAGES = list(CoquiTTS.langs.keys())
LANGUAGES = LANGUAGES + ['cn', 'jp']
default_lang = "en"
whisper = gr.Interface.load(name="spaces/sanchit-gandhi/whisper-large-v2")
api_key = os.environ.get('api_key')
#if you have OpenAI API key as a string, enable the below
openai.api_key = api_key
pth_path = config.pth_path
config_json = config.config_json
net_g_ms, hps = infer.load_model(config_json, pth_path)
# ChatGPT
def chat_hf(audio, custom_token, language):
try:
whisper_text = translate(audio)
if whisper_text == "ERROR: You have to either use the microphone or upload an audio file":
gpt_response = "MISSING AUDIO: Record your voice by clicking the microphone button, do not forget to stop recording before sending your message ;)"
else:
gpt_response = openai_create(whisper_text)
except:
whisper_text = translate(audio)
gpt_response = """Sorry, I'm quite busy right now, but please try again later :)"""
# to voice
if language == 'cn' or 'jp':
text = gpt_response.strip().replace(' ', '').replace('\n', '').replace('\r', '')
text = infer.clean_text(text)
audio = infer.infer(text, net_g_ms, 0, "demo")
voice_out = (hps.data.sampling_rate, audio)
else:
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
coquiTTS.get_tts(gpt_response, fp, speaker = {"language" : language})
voice_out = fp.name
return whisper_text, gpt_response, voice_out
def translate(audio):
print("""
—
Sending audio to Whisper ...
—
""")
text_result = whisper(audio, None, "transcribe", fn_index=0)
print(text_result)
return text_result
def openai_create(prompt):
print("""
—
Giving response from ai ...
—
""")
response = openai.Completion.create(
model="text-davinci-003",
prompt=prompt,
temperature=0.9,
max_tokens=150,
top_p=1,
frequency_penalty=0,
presence_penalty=0.6,
stop=[" Human:", " AI:"]
)
print(response.choices[0].text)
return response.choices[0].text
with gr.Blocks() as blocks:
gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>" + title + "</h1>")
radio = gr.Radio(label="Language", choices=LANGUAGES, value=default_lang)
with gr.Row(equal_height=True):# equal_height=False
with gr.Column():# variant="panel"
audio_file = gr.Audio(source="microphone", type="filepath")
custom_token = gr.Textbox(label='If it fails, use your own session token', placeholder="your own session token")
with gr.Row():# mobile_collapse=False
submit = gr.Button("Submit", variant="primary")
with gr.Column():
text1 = gr.Textbox(label="Speech to Text")
text2 = gr.Textbox(label="ChatGPT Response")
audio = gr.Audio(label="Output", interactive=False)
# actions
submit.click(
chat_hf,
[audio_file, custom_token, radio],
[text1, text2, audio],
)
blocks.launch(debug=True)
|