Spaces:
Runtime error
Runtime error
streaming chat into speech
Browse files- debug.py +8 -10
- speech_service.py +5 -18
- streaming_chat_service.py +71 -0
debug.py
CHANGED
@@ -4,7 +4,7 @@ from dotenv import load_dotenv
|
|
4 |
from speech_service import SpeechService
|
5 |
from concurrent.futures import ThreadPoolExecutor
|
6 |
from audio_stream_processor import AudioStreamProcessor
|
7 |
-
|
8 |
|
9 |
def run_debug_code():
|
10 |
load_dotenv()
|
@@ -14,11 +14,11 @@ def run_debug_code():
|
|
14 |
# print ("CLIP success")
|
15 |
|
16 |
print ("Initializing Chat")
|
17 |
-
chat_service = ChatService()
|
|
|
|
|
18 |
|
19 |
user_speech_service = SpeechService(voice_id="Adam")
|
20 |
-
ai_speech_service = SpeechService(voice_id="2OviOUQc1JsQRQgNkVBj") # Chales003
|
21 |
-
processor = AudioStreamProcessor()
|
22 |
|
23 |
# user_speech_service.print_voices() # if you want to see your custom voices
|
24 |
|
@@ -32,15 +32,13 @@ def run_debug_code():
|
|
32 |
print ("")
|
33 |
print (f'prompt: "{prompt}"')
|
34 |
stream = user_speech_service.stream(prompt)
|
35 |
-
|
36 |
|
37 |
-
response = chat_service.chat(prompt)
|
38 |
print ("")
|
39 |
-
print (f'response:
|
40 |
-
|
41 |
-
processor.add_audio_stream(stream)
|
42 |
|
43 |
-
|
44 |
print ("Chat success")
|
45 |
|
46 |
|
|
|
4 |
from speech_service import SpeechService
|
5 |
from concurrent.futures import ThreadPoolExecutor
|
6 |
from audio_stream_processor import AudioStreamProcessor
|
7 |
+
from streaming_chat_service import StreamingChatService
|
8 |
|
9 |
def run_debug_code():
|
10 |
load_dotenv()
|
|
|
14 |
# print ("CLIP success")
|
15 |
|
16 |
print ("Initializing Chat")
|
17 |
+
# chat_service = ChatService()
|
18 |
+
audio_processor = AudioStreamProcessor()
|
19 |
+
chat_service = StreamingChatService(audio_processor, voice_id="2OviOUQc1JsQRQgNkVBj") # Chales003
|
20 |
|
21 |
user_speech_service = SpeechService(voice_id="Adam")
|
|
|
|
|
22 |
|
23 |
# user_speech_service.print_voices() # if you want to see your custom voices
|
24 |
|
|
|
32 |
print ("")
|
33 |
print (f'prompt: "{prompt}"')
|
34 |
stream = user_speech_service.stream(prompt)
|
35 |
+
audio_processor.add_audio_stream(stream)
|
36 |
|
|
|
37 |
print ("")
|
38 |
+
print (f'response:')
|
39 |
+
response = chat_service.respond_to(prompt)
|
|
|
40 |
|
41 |
+
audio_processor.close()
|
42 |
print ("Chat success")
|
43 |
|
44 |
|
speech_service.py
CHANGED
@@ -25,24 +25,11 @@ class SpeechService:
|
|
25 |
print (voice)
|
26 |
|
27 |
def speak(self, prompt):
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
# play(audio)
|
34 |
-
audio_stream = generate(
|
35 |
-
text=prompt,
|
36 |
-
voice=self._voice_id,
|
37 |
-
model=self._model_id,
|
38 |
-
stream=True
|
39 |
-
)
|
40 |
-
# stream(audio_stream)
|
41 |
-
audio = b""
|
42 |
-
for chunk in audio_stream:
|
43 |
-
if chunk is not None:
|
44 |
-
audio += chunk
|
45 |
-
# play(chunk)
|
46 |
play(audio)
|
47 |
return
|
48 |
|
|
|
25 |
print (voice)
|
26 |
|
27 |
def speak(self, prompt):
|
28 |
+
audio = generate(
|
29 |
+
text=prompt,
|
30 |
+
voice=self._voice_id,
|
31 |
+
model=self._model_id,
|
32 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
play(audio)
|
34 |
return
|
35 |
|
streaming_chat_service.py
ADDED
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
import torch
|
4 |
+
import openai
|
5 |
+
|
6 |
+
from audio_stream_processor import AudioStreamProcessor
|
7 |
+
from speech_service import SpeechService
|
8 |
+
|
9 |
+
|
10 |
+
class StreamingChatService:
|
11 |
+
def __init__(self, audio_processor:AudioStreamProcessor()=None, api="openai", model_id = "gpt-3.5-turbo", voice_id="Bella"):
|
12 |
+
self._audio_processor = audio_processor
|
13 |
+
self._speech_service = SpeechService(voice_id=voice_id)
|
14 |
+
self._api = api
|
15 |
+
self._device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
16 |
+
self._system_prompt = None
|
17 |
+
|
18 |
+
openai.api_key = os.getenv("OPENAI_API_KEY")
|
19 |
+
self._model_id = model_id
|
20 |
+
self.reset()
|
21 |
+
|
22 |
+
def reset(self):
|
23 |
+
self._messages = []
|
24 |
+
if self._system_prompt:
|
25 |
+
self._messages.append({"role": "system", "content": self._system_prompt})
|
26 |
+
|
27 |
+
def _should_we_send_to_voice(self, sentence):
|
28 |
+
sentence_termination_characters = [".", "?", "!"]
|
29 |
+
temination_charicter_present = any(c in sentence for c in sentence_termination_characters)
|
30 |
+
if temination_charicter_present and sentence[-1] not in sentence_termination_characters:
|
31 |
+
# text_to_speak = sentence up until the last sentence termination character
|
32 |
+
termination_indices = [sentence.rfind(char) for char in sentence_termination_characters]
|
33 |
+
last_termination_index = max(termination_indices)
|
34 |
+
text_to_speak = sentence[:last_termination_index+1]
|
35 |
+
return text_to_speak
|
36 |
+
if temination_charicter_present:
|
37 |
+
return False
|
38 |
+
return False
|
39 |
+
|
40 |
+
def respond_to(self, prompt):
|
41 |
+
self._messages.append({"role": "user", "content": prompt})
|
42 |
+
agent_response = ""
|
43 |
+
current_sentence = ""
|
44 |
+
|
45 |
+
response = openai.ChatCompletion.create(
|
46 |
+
model=self._model_id,
|
47 |
+
messages=self._messages,
|
48 |
+
temperature=1.0, # use 1.0 for debugging/deteministic results
|
49 |
+
stream=True
|
50 |
+
)
|
51 |
+
|
52 |
+
for chunk in response:
|
53 |
+
chunk_message = chunk['choices'][0]['delta']
|
54 |
+
if 'content' in chunk_message:
|
55 |
+
chunk_text = chunk_message['content']
|
56 |
+
print(chunk_text)
|
57 |
+
current_sentence += chunk_text
|
58 |
+
agent_response += chunk_text
|
59 |
+
text_to_speak = self._should_we_send_to_voice(current_sentence)
|
60 |
+
if text_to_speak:
|
61 |
+
stream = self._speech_service.stream(text_to_speak)
|
62 |
+
self._audio_processor.add_audio_stream(stream)
|
63 |
+
|
64 |
+
# current_sentence should be reset to the text after the last sentence termination character
|
65 |
+
current_sentence = current_sentence[len(text_to_speak):]
|
66 |
+
|
67 |
+
if len(current_sentence) > 0:
|
68 |
+
stream = self._speech_service.stream(current_sentence)
|
69 |
+
self._audio_processor.add_audio_stream(stream)
|
70 |
+
self._messages.append({"role": "assistant", "content": agent_response})
|
71 |
+
return agent_response
|