Spaces:
Runtime error
Runtime error
File size: 3,349 Bytes
dda3d27 4f485d9 dda3d27 4f485d9 dda3d27 4f485d9 dda3d27 4f485d9 0694ca8 4f485d9 31a365d 4f485d9 e0fcf8f 82346b0 3828e69 21789e6 2418a12 dda3d27 2418a12 4f485d9 dda3d27 4f485d9 3828e69 4f485d9 82346b0 4f485d9 dda3d27 4f485d9 dda3d27 4f485d9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
import os
import json
import openai
import tempfile
import gradio as gr
import infer
import config
from neon_tts_plugin_coqui import CoquiTTS
title = "Speech to ChatGPT to Speech"
coquiTTS = CoquiTTS()
LANGUAGES = list(CoquiTTS.langs.keys())
LANGUAGES = LANGUAGES + ['cn', 'jp']
default_lang = "en"
whisper = gr.Interface.load(name="spaces/sanchit-gandhi/whisper-large-v2")
api_key = os.environ.get('api_key')
#if you have OpenAI API key as a string, enable the below
openai.api_key = api_key
pth_path = config.pth_path
config_json = config.config_json
net_g_ms, hps = infer.load_model(config_json, pth_path)
# ChatGPT
def chat_hf(audio, custom_token, language):
try:
whisper_text = translate(audio)
if whisper_text == "ERROR: You have to either use the microphone or upload an audio file":
gpt_response = "MISSING AUDIO: Record your voice by clicking the microphone button, do not forget to stop recording before sending your message ;)"
else:
gpt_response = openai_create(whisper_text)
except:
whisper_text = translate(audio)
gpt_response = """Sorry, I'm quite busy right now, but please try again later :)"""
# to voice
print(language)
if language in ['cn', 'jp']:
text = gpt_response.strip().replace(' ', '').replace('\n', '').replace('\r', '')
text = infer.clean_text(text)
audio = infer.infer(text, net_g_ms, 0, "demo")
voice_out = (hps.data.sampling_rate, audio)
return whisper_text, gpt_response, voice_out
else:
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
coquiTTS.get_tts(gpt_response, fp, speaker = {"language" : language})
return whisper_text, gpt_response, fp.name
def translate(audio):
print("""
—
Sending audio to Whisper ...
—
""")
text_result = whisper(audio, None, "transcribe", fn_index=0)
print(text_result)
return text_result
def openai_create(prompt):
print("""
—
Giving response from ai ...
—
""")
response = openai.Completion.create(
model="text-davinci-003",
prompt=prompt,
temperature=0.9,
max_tokens=150,
top_p=1,
frequency_penalty=0,
presence_penalty=0.6,
stop=[" Human:", " AI:"]
)
print(response.choices[0].text)
return response.choices[0].text
with gr.Blocks() as blocks:
gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>" + title + "</h1>")
radio = gr.Radio(label="Language", choices=LANGUAGES, value=default_lang)
with gr.Row(equal_height=True):# equal_height=False
with gr.Column():# variant="panel"
audio_file = gr.Audio(source="microphone", type="filepath")
custom_token = gr.Textbox(label='If it fails, use your own session token', placeholder="your own session token")
with gr.Row():# mobile_collapse=False
submit = gr.Button("Submit", variant="primary")
with gr.Column():
text1 = gr.Textbox(label="Speech to Text")
text2 = gr.Textbox(label="ChatGPT Response")
audio = gr.Audio(label="Output", interactive=False)
# actions
submit.click(
chat_hf,
[audio_file, custom_token, radio],
[text1, text2, audio],
)
blocks.launch(debug=True)
|