Chen, Yusin commited on
Commit
4f485d9
·
0 Parent(s):

Duplicate from Yusin/Speech-ChatGPT-Speech

Browse files
Files changed (7) hide show
  1. .gitattributes +27 -0
  2. .gitignore +1 -0
  3. README.md +41 -0
  4. app.py +132 -0
  5. packages.txt +2 -0
  6. pygpt.py +112 -0
  7. requirements.txt +3 -0
.gitattributes ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ftz filter=lfs diff=lfs merge=lfs -text
6
+ *.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.h5 filter=lfs diff=lfs merge=lfs -text
8
+ *.joblib filter=lfs diff=lfs merge=lfs -text
9
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
10
+ *.model filter=lfs diff=lfs merge=lfs -text
11
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
12
+ *.onnx filter=lfs diff=lfs merge=lfs -text
13
+ *.ot filter=lfs diff=lfs merge=lfs -text
14
+ *.parquet filter=lfs diff=lfs merge=lfs -text
15
+ *.pb filter=lfs diff=lfs merge=lfs -text
16
+ *.pt filter=lfs diff=lfs merge=lfs -text
17
+ *.pth filter=lfs diff=lfs merge=lfs -text
18
+ *.rar filter=lfs diff=lfs merge=lfs -text
19
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
20
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
21
+ *.tflite filter=lfs diff=lfs merge=lfs -text
22
+ *.tgz filter=lfs diff=lfs merge=lfs -text
23
+ *.wasm filter=lfs diff=lfs merge=lfs -text
24
+ *.xz filter=lfs diff=lfs merge=lfs -text
25
+ *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
README.md ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Speech2ChatGPT2Speech
3
+ emoji: 🗣️🙉
4
+ colorFrom: indigo
5
+ colorTo: yellow
6
+ sdk: gradio
7
+ python_version: 3.9
8
+ sdk_version: 3.12.0
9
+ app_file: app.py
10
+ models:
11
+ - neongeckocom/tts-vits-ljspeech-en
12
+ - neongeckocom/tts-vits-css10-es
13
+ - neongeckocom/tts-vits-css10-fr
14
+ - neongeckocom/tts-vits-css10-de
15
+ - neongeckocom/tts-vits-cv-it
16
+ - neongeckocom/tts-vits-mai-pl
17
+ - neongeckocom/tts-vits-mai-uk
18
+ - neongeckocom/tts-vits-cv-ro
19
+ - neongeckocom/tts-vits-css10-hu
20
+ - neongeckocom/tts-vits-cv-el
21
+ - neongeckocom/tts-vits-cv-cs
22
+ - neongeckocom/tts-vits-cv-sv
23
+ - neongeckocom/tts-vits-cv-pt
24
+ - neongeckocom/tts-vits-cv-bg
25
+ - neongeckocom/tts-vits-cv-hr
26
+ - neongeckocom/tts-vits-cv-da
27
+ - neongeckocom/tts-vits-cv-sk
28
+ - neongeckocom/tts-vits-css10-nl
29
+ - neongeckocom/tts-vits-css10-fi
30
+ - neongeckocom/tts-vits-cv-lt
31
+ - neongeckocom/tts-vits-cv-sl
32
+ - neongeckocom/tts-vits-cv-lv
33
+ - neongeckocom/tts-vits-cv-et
34
+ - neongeckocom/tts-vits-cv-ga
35
+ - neongeckocom/tts-vits-cv-mt
36
+ pinned: false
37
+ license: apache-2.0
38
+ duplicated_from: Yusin/Speech-ChatGPT-Speech
39
+ ---
40
+
41
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
app.py ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tempfile
2
+ import gradio as gr
3
+ from neon_tts_plugin_coqui import CoquiTTS
4
+ LANGUAGES = list(CoquiTTS.langs.keys())
5
+ LANGUAGES = LANGUAGES + ['cn', 'jp']
6
+ default_lang = "en"
7
+ #import whisper
8
+ #whisper_model = whisper.load_model("small")
9
+ #whisper = gr.Interface.load(name="spaces/abidlabs/whisper-large-v2")
10
+ whisper = gr.Interface.load(name="spaces/sanchit-gandhi/whisper-large-v2")
11
+ #cn_a_jp = gr.Blocks.load(name="spaces/Yusin/anime-tts_yusin")
12
+ #chatgpt = gr.Blocks.load(name="spaces/fffiloni/whisper-to-chatGPT")
13
+ #chatgpt = gr.Blocks.load(name="spaces/seawolf2357/chatgptclone")
14
+ import os
15
+ import json
16
+ import openai
17
+ #session_token = os.environ.get('SessionToken')
18
+ api_key = os.environ.get('api_key')
19
+ #if you have OpenAI API key as a string, enable the below
20
+ openai.api_key = api_key
21
+
22
+ title = "Speech to ChatGPT to Speech"
23
+ #info = "more info at [Neon Coqui TTS Plugin](https://github.com/NeonGeckoCom/neon-tts-plugin-coqui), [Coqui TTS](https://github.com/coqui-ai/TTS)"
24
+ #badge = "https://visitor-badge-reloaded.herokuapp.com/badge?page_id=neongeckocom.neon-tts-plugin-coqui"
25
+ coquiTTS = CoquiTTS()
26
+
27
+
28
+ # ChatGPT
29
+ def chat_hf(audio, custom_token, language):
30
+ try:
31
+ whisper_text = translate(audio)
32
+ if whisper_text == "ERROR: You have to either use the microphone or upload an audio file":
33
+ gpt_response = "MISSING AUDIO: Record your voice by clicking the microphone button, do not forget to stop recording before sending your message ;)"
34
+ else:
35
+ #gpt_response = chatgpt(whisper_text, [], fn_index=0)
36
+ #print(gpt_response)
37
+ #gpt_response = gpt_response[0]
38
+ gpt_response = openai_create(whisper_text)
39
+
40
+ except:
41
+ whisper_text = translate(audio)
42
+ gpt_response = """Sorry, I'm quite busy right now, but please try again later :)"""
43
+
44
+ # to voice
45
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
46
+ coquiTTS.get_tts(gpt_response, fp, speaker = {"language" : language})
47
+
48
+ return whisper_text, gpt_response, fp.name
49
+
50
+ # whisper
51
+ #def translate(audio):
52
+ # print("""
53
+ # —
54
+ # Sending audio to Whisper ...
55
+ # —
56
+ # """)
57
+ #
58
+ # audio = whisper.load_audio(audio)
59
+ # audio = whisper.pad_or_trim(audio)
60
+ #
61
+ # mel = whisper.log_mel_spectrogram(audio).to(whisper_model.device)
62
+ #
63
+ # _, probs = whisper_model.detect_language(mel)
64
+ #
65
+ # transcript_options = whisper.DecodingOptions(task="transcribe", fp16 = False)
66
+ #
67
+ # transcription = whisper.decode(whisper_model, mel, transcript_options)
68
+ #
69
+ # print("language spoken: " + transcription.language)
70
+ # print("transcript: " + transcription.text)
71
+ # print("———————————————————————————————————————————")
72
+ #
73
+ # return transcription.text
74
+
75
+ def translate(audio):
76
+ print("""
77
+
78
+ Sending audio to Whisper ...
79
+
80
+ """)
81
+ #_, text_result = whisper(audio, "", fn_index=0)
82
+ text_result = whisper(audio, None, "transcribe", fn_index=0)
83
+ print(text_result)
84
+ return text_result
85
+
86
+
87
+ def openai_create(prompt):
88
+
89
+ response = openai.Completion.create(
90
+ model="text-davinci-003",
91
+ prompt=prompt,
92
+ temperature=0.9,
93
+ max_tokens=150,
94
+ top_p=1,
95
+ frequency_penalty=0,
96
+ presence_penalty=0.6,
97
+ stop=[" Human:", " AI:"]
98
+ )
99
+ print(response.choices[0].text)
100
+ return response.choices[0].text
101
+
102
+ with gr.Blocks() as blocks:
103
+ gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>"
104
+ + title
105
+ + "</h1>")
106
+ #gr.Markdown(description)
107
+ radio = gr.Radio(label="Language", choices=LANGUAGES, value=default_lang)
108
+ with gr.Row(equal_height=True):# equal_height=False
109
+ with gr.Column():# variant="panel"
110
+ audio_file = gr.Audio(source="microphone", type="filepath")
111
+ custom_token = gr.Textbox(label='If it fails, use your own session token', placeholder="your own session token")
112
+ with gr.Row():# mobile_collapse=False
113
+ submit = gr.Button("Submit", variant="primary")
114
+ with gr.Column():
115
+ text1 = gr.Textbox(label="Speech to Text")
116
+ text2 = gr.Textbox(label="ChatGPT Response")
117
+ audio = gr.Audio(label="Output", interactive=False)
118
+ #gr.Markdown(info)
119
+ #gr.Markdown("<center>"
120
+ # +f'<img src={badge} alt="visitors badge"/>'
121
+ # +"</center>")
122
+
123
+ # actions
124
+ submit.click(
125
+ chat_hf,
126
+ [audio_file, custom_token, radio],
127
+ [text1, text2, audio],
128
+ )
129
+ #radio.change(lambda lang: CoquiTTS.langs[lang]["sentence"], radio, text2)
130
+
131
+
132
+ blocks.launch(debug=True)
packages.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ libsndfile1
2
+ espeak-ng
pygpt.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import uuid
2
+ import asyncio
3
+ import socketio
4
+ import datetime
5
+ import json
6
+ import base64
7
+
8
+ class PyGPT:
9
+ def __init__(self, session_token, bypass_node='https://gpt.pawan.krd'):
10
+ self.ready = False
11
+ self.socket = socketio.AsyncClient()
12
+ self.socket.on('connect', self.on_connect)
13
+ self.socket.on('disconnect', self.on_disconnect)
14
+ self.session_token = session_token
15
+ self.conversations = []
16
+ self.auth = None
17
+ self.expires = datetime.datetime.now()
18
+ self.pause_token_checks = False
19
+ self.bypass_node = bypass_node
20
+ asyncio.create_task(self.cleanup_conversations())
21
+
22
+ async def connect(self):
23
+ await self.socket.connect(self.bypass_node)
24
+
25
+ async def disconnect(self):
26
+ await self.socket.disconnect()
27
+ await self.socket.close()
28
+
29
+ def on_connect(self):
30
+ print('Connected to server')
31
+ asyncio.create_task(self.check_tokens())
32
+
33
+ def on_disconnect(self):
34
+ print('Disconnected from server')
35
+ self.ready = False
36
+
37
+ async def check_tokens(self):
38
+ while True:
39
+ if self.pause_token_checks:
40
+ await asyncio.sleep(0.5)
41
+ continue
42
+ self.pause_token_checks = True
43
+ now = datetime.datetime.now()
44
+ offset = datetime.timedelta(minutes=2)
45
+ if self.expires < (now - offset) or not self.auth:
46
+ await self.get_tokens()
47
+ self.pause_token_checks = False
48
+ await asyncio.sleep(0.5)
49
+
50
+ async def cleanup_conversations(self):
51
+ while True:
52
+ await asyncio.sleep(60)
53
+ now = datetime.datetime.now()
54
+ self.conversations = [c for c in self.conversations if now - c['last_active'] < datetime.timedelta(minutes=2)]
55
+
56
+ def add_conversation(self, id):
57
+ conversation = {
58
+ 'id': id,
59
+ 'conversation_id': None,
60
+ 'parent_id': uuid.uuid4(),
61
+ 'last_active': datetime.datetime.now()
62
+ }
63
+ self.conversations.append(conversation)
64
+ return conversation
65
+
66
+ def get_conversation_by_id(self, id):
67
+ conversation = next((c for c in self.conversations if c['id'] == id), None)
68
+ if conversation is None:
69
+ conversation = self.add_conversation(id)
70
+ else:
71
+ conversation['last_active'] = datetime.datetime.now()
72
+ return conversation
73
+
74
+ async def wait_for_ready(self):
75
+ while not self.ready:
76
+ await asyncio.sleep(0.025)
77
+ print('Ready!!')
78
+
79
+ async def ask(self, prompt, id='default'):
80
+ if not self.auth or not self.validate_token(self.auth):
81
+ await self.get_tokens()
82
+ conversation = self.get_conversation_by_id(id)
83
+ data = await self.socket.call('askQuestion', {
84
+ 'prompt': prompt,
85
+ 'parentId': str(conversation['parent_id']),
86
+ 'conversationId': str(conversation['conversation_id']),
87
+ 'auth': self.auth
88
+ })
89
+
90
+ if 'error' in data:
91
+ print(f'Error: {data["error"]}')
92
+ conversation['parent_id'] = data['messageId']
93
+ conversation['conversation_id'] = data['conversationId']
94
+ return data['answer']
95
+
96
+ def validate_token(self, token):
97
+ if not token:
98
+ return False
99
+ parsed = json.loads(base64.b64decode(f'{token.split(".")[1]}==').decode())
100
+ return datetime.datetime.now() <= datetime.datetime.fromtimestamp(parsed['exp'])
101
+
102
+ async def get_tokens(self):
103
+ await asyncio.sleep(1)
104
+ data = await self.socket.call('getSession', self.session_token)
105
+
106
+ if 'error' in data:
107
+ print(f'Error getting session: {data["error"]}')
108
+ else:
109
+ self.auth = data['auth']
110
+ self.expires = datetime.datetime.strptime(data['expires'], '%Y-%m-%dT%H:%M:%S.%fZ')
111
+ self.session_token = data['sessionToken']
112
+ self.ready = True
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ neon-tts-plugin-coqui==0.7.0
2
+ openai
3
+ PyGPT