Spaces:
No application file
No application file
rayyanreda
commited on
Commit
·
a282198
1
Parent(s):
8f5d03e
Delete audio-chatbot
Browse files- audio-chatbot/app.py +0 -190
- audio-chatbot/requirements.txt +0 -7
audio-chatbot/app.py
DELETED
@@ -1,190 +0,0 @@
|
|
1 |
-
from base64 import b64encode
|
2 |
-
from io import BytesIO
|
3 |
-
|
4 |
-
from gradio import Audio, Interface, Textbox
|
5 |
-
from gtts import gTTS
|
6 |
-
from mtranslate import translate
|
7 |
-
from speech_recognition import AudioFile, Recognizer
|
8 |
-
from transformers import (BlenderbotSmallForConditionalGeneration,
|
9 |
-
BlenderbotSmallTokenizer)
|
10 |
-
|
11 |
-
|
12 |
-
# Speech to text function
|
13 |
-
def stt(audio: object, language: str) -> str:
|
14 |
-
"""Converts speech to text.
|
15 |
-
Args:
|
16 |
-
audio: record of user speech
|
17 |
-
Returns:
|
18 |
-
text (str): recognized speech of user
|
19 |
-
"""
|
20 |
-
|
21 |
-
# Create a Recognizer object
|
22 |
-
r = Recognizer()
|
23 |
-
# Open the audio file
|
24 |
-
with AudioFile(audio) as source:
|
25 |
-
# Listen for the data (load audio to memory)
|
26 |
-
audio_data = r.record(source)
|
27 |
-
# Transcribe the audio using Google's speech-to-text API
|
28 |
-
text = r.recognize_google(audio_data, language=language)
|
29 |
-
return text
|
30 |
-
|
31 |
-
def to_en_translation(text: str, language: str) -> str:
|
32 |
-
"""Translates text from specified language to English.
|
33 |
-
Args:
|
34 |
-
text (str): input text
|
35 |
-
language (str): desired language
|
36 |
-
Returns:
|
37 |
-
str: translated text
|
38 |
-
"""
|
39 |
-
return translate(text, "en", language)
|
40 |
-
|
41 |
-
|
42 |
-
# translating to english
|
43 |
-
def from_en_translation(text: str, language: str) -> str:
|
44 |
-
"""Translates text from english to specified language.
|
45 |
-
Args:
|
46 |
-
text (str): input text
|
47 |
-
language (str): desired language
|
48 |
-
Returns:
|
49 |
-
str: translated text
|
50 |
-
"""
|
51 |
-
return translate(text, language, "en")
|
52 |
-
|
53 |
-
|
54 |
-
class TextGenerationPipeline:
|
55 |
-
"""Pipeline for text generation of blenderbot model.
|
56 |
-
Returns:
|
57 |
-
str: generated text
|
58 |
-
"""
|
59 |
-
|
60 |
-
# load tokenizer and the model
|
61 |
-
model_name = "facebook/blenderbot_small-90M"
|
62 |
-
tokenizer = BlenderbotSmallTokenizer.from_pretrained(model_name)
|
63 |
-
model = BlenderbotSmallForConditionalGeneration.from_pretrained(model_name)
|
64 |
-
|
65 |
-
def __init__(self, **kwargs):
|
66 |
-
"""Specififying text generation parameters.
|
67 |
-
For example: max_length=100 which generates text shorter than
|
68 |
-
100 tokens. Visit:
|
69 |
-
https://huggingface.co/docs/transformers/main_classes/text_generation
|
70 |
-
for more parameters
|
71 |
-
"""
|
72 |
-
self.__dict__.update(kwargs)
|
73 |
-
|
74 |
-
def preprocess(self, text) -> str:
|
75 |
-
"""Tokenizes input text.
|
76 |
-
Args:
|
77 |
-
text (str): user specified text
|
78 |
-
Returns:
|
79 |
-
torch.Tensor (obj): text representation as tensors
|
80 |
-
"""
|
81 |
-
return self.tokenizer(text, return_tensors="pt")
|
82 |
-
|
83 |
-
def postprocess(self, outputs) -> str:
|
84 |
-
"""Converts tensors into text.
|
85 |
-
Args:
|
86 |
-
outputs (torch.Tensor obj): model text generation output
|
87 |
-
Returns:
|
88 |
-
str: generated text
|
89 |
-
"""
|
90 |
-
return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
91 |
-
|
92 |
-
def __call__(self, text: str) -> str:
|
93 |
-
"""Generates text from input text.
|
94 |
-
Args:
|
95 |
-
text (str): user specified text
|
96 |
-
Returns:
|
97 |
-
str: generated text
|
98 |
-
"""
|
99 |
-
tokenized_text = self.preprocess(text)
|
100 |
-
output = self.model.generate(**tokenized_text, **self.__dict__)
|
101 |
-
return self.postprocess(output)
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
def tts(text: str, language: str) -> object:
|
106 |
-
"""Converts text into audio object.
|
107 |
-
Args:
|
108 |
-
text (str): generated answer of bot
|
109 |
-
Returns:
|
110 |
-
object: text to speech object
|
111 |
-
"""
|
112 |
-
return gTTS(text=text, lang=language, slow=False)
|
113 |
-
|
114 |
-
def tts_to_bytesio(tts_object: object) -> bytes:
|
115 |
-
"""Converts tts object to bytes.
|
116 |
-
Args:
|
117 |
-
tts_object (object): audio object obtained from gtts
|
118 |
-
Returns:
|
119 |
-
bytes: audio bytes
|
120 |
-
"""
|
121 |
-
bytes_object = BytesIO()
|
122 |
-
tts_object.write_to_fp(bytes_object)
|
123 |
-
bytes_object.seek(0)
|
124 |
-
return bytes_object.getvalue()
|
125 |
-
|
126 |
-
|
127 |
-
def html_audio_autoplay(bytes: bytes) -> object:
|
128 |
-
"""Creates html object for autoplaying audio at gradio app.
|
129 |
-
Args:
|
130 |
-
bytes (bytes): audio bytes
|
131 |
-
Returns:
|
132 |
-
object: html object that provides audio autoplaying
|
133 |
-
"""
|
134 |
-
b64 = b64encode(bytes).decode()
|
135 |
-
html = f"""
|
136 |
-
<audio controls autoplay>
|
137 |
-
<source src="data:audio/wav;base64,{b64}" type="audio/wav">
|
138 |
-
</audio>
|
139 |
-
"""
|
140 |
-
return html
|
141 |
-
|
142 |
-
max_answer_length=100
|
143 |
-
desired_language = "de"
|
144 |
-
response_generator_pipe = TextGenerationPipeline(max_length=max_answer_length)
|
145 |
-
|
146 |
-
|
147 |
-
def main(audio: object):
|
148 |
-
"""Calls functions for deploying gradio app.
|
149 |
-
|
150 |
-
It responds both verbally and in text
|
151 |
-
by taking voice input from user.
|
152 |
-
|
153 |
-
Args:
|
154 |
-
audio (object): recorded speech of user
|
155 |
-
|
156 |
-
Returns:
|
157 |
-
tuple containing
|
158 |
-
|
159 |
-
- user_speech_text (str) : recognized speech
|
160 |
-
- bot_response_de (str) : translated answer of bot
|
161 |
-
- bot_response_en (str) : bot's original answer
|
162 |
-
- html (object) : autoplayer for bot's speech
|
163 |
-
"""
|
164 |
-
user_speech_text = stt(audio, desired_language)
|
165 |
-
tranlated_text = to_en_translation(user_speech_text, desired_language)
|
166 |
-
bot_response_en = response_generator_pipe(tranlated_text)
|
167 |
-
bot_response_de = from_en_translation(bot_response_en, desired_language)
|
168 |
-
bot_voice = tts(bot_response_de, desired_language)
|
169 |
-
bot_voice_bytes = tts_to_bytesio(bot_voice)
|
170 |
-
html = html_audio_autoplay(bot_voice_bytes)
|
171 |
-
return user_speech_text, bot_response_de, bot_response_en, html
|
172 |
-
|
173 |
-
|
174 |
-
Interface(
|
175 |
-
fn=main,
|
176 |
-
inputs=[
|
177 |
-
Audio(
|
178 |
-
source="microphone",
|
179 |
-
type="filepath",
|
180 |
-
),
|
181 |
-
],
|
182 |
-
outputs=[
|
183 |
-
Textbox(label="You said: "),
|
184 |
-
Textbox(label="AI said: "),
|
185 |
-
Textbox(label="AI said (English): "),
|
186 |
-
"html",
|
187 |
-
],
|
188 |
-
live=True,
|
189 |
-
allow_flagging="never",
|
190 |
-
).launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
audio-chatbot/requirements.txt
DELETED
@@ -1,7 +0,0 @@
|
|
1 |
-
transformers==4.25.1
|
2 |
-
--find-links https://download.pytorch.org/whl/torch_stable.html
|
3 |
-
torch==1.13.1+cpu
|
4 |
-
gradio==3.14.0
|
5 |
-
SpeechRecognition==3.9.0
|
6 |
-
mtranslate==1.8
|
7 |
-
gTTS==2.3.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|