remzicam commited on
Commit
2be0e9c
·
1 Parent(s): 2ac7ca4

Upload 4 files

Browse files
Files changed (4) hide show
  1. German_AI_Voicebot.png +0 -0
  2. app.py +122 -0
  3. blender_model.py +207 -0
  4. requirements.txt +0 -0
German_AI_Voicebot.png ADDED
app.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import (BlenderbotSmallTokenizer,
2
+ logging)
3
+ from mtranslate import translate
4
+ from io import BytesIO
5
+ from base64 import b64encode
6
+ import gradio as gr
7
+ from speech_recognition import Recognizer,AudioFile
8
+ from gtts import gTTS
9
+ from blender_model import blender_onnx_model
10
+
11
+ #supress huggingface warnings
12
+ logging.set_verbosity_error()
13
+ bot_tokenizer_name="facebook/blenderbot_small-90M"
14
+ max_answer_length=100
15
+ bot_language="en"
16
+ main_language = 'de'
17
+ bot_tokenizer = BlenderbotSmallTokenizer.from_pretrained(bot_tokenizer_name)
18
+ #load chatbot model
19
+ bot_model=blender_onnx_model
20
+
21
+ def app(audio):
22
+ """
23
+ It takes voice input from user then
24
+ responds it both verbally and in text.
25
+ """
26
+ text=stt(audio)
27
+ bot_response_en,bot_response_de=answer_generation(text)
28
+ voice_bot=tts(bot_response_de)
29
+ b64 = b64encode(voice_bot).decode()
30
+ #html code that automatically play sounds
31
+ html = f"""
32
+ <audio controls autoplay>
33
+ <source src="data:audio/wav;base64,{b64}" type="audio/wav">
34
+ </audio>
35
+ """
36
+ return text,html,bot_response_de,bot_response_en
37
+
38
+ def stt(audio):
39
+ """
40
+ speech to text converter
41
+
42
+ Args:
43
+ audio: record of user speech
44
+
45
+ Returns:
46
+ text (str): recognized speech of user
47
+ """
48
+ r = Recognizer()
49
+ # open the file
50
+ with AudioFile(audio) as source:
51
+ # listen for the data (load audio to memory)
52
+ audio_data = r.record(source)
53
+ # recognize (convert from speech to text)
54
+ text = r.recognize_google(audio_data,
55
+ language=main_language)
56
+ return text
57
+
58
+ def answer_generation(user_input_de:str):
59
+ """
60
+ it takes user input as text in german language.
61
+ Then it translates into English. Blenderbot works only in English.
62
+ Then the model generates an answer w.r.t English version of the input.
63
+ Finally, bot's response is translated into German.
64
+
65
+ Args:
66
+ user_input (str): text version of user's speech
67
+
68
+ Returns:
69
+ translated_bot_response (str): bot's response in german language
70
+ """
71
+ #de-en translation
72
+ user_input_en=translate(user_input_de,
73
+ bot_language,
74
+ main_language)
75
+ inputs = bot_tokenizer(user_input_en,
76
+ return_tensors="pt")
77
+ generation= bot_model.generate(**inputs,
78
+ max_length=max_answer_length)
79
+ bot_response_en=bot_tokenizer.decode(generation[0],
80
+ skip_special_tokens = True)
81
+ #en-de translation
82
+ bot_response_de=translate(bot_response_en,
83
+ main_language,
84
+ bot_language)
85
+
86
+ return bot_response_en,bot_response_de
87
+
88
+ def tts(text:str):
89
+ """converts text into audio bytes
90
+
91
+ Args:
92
+ text (str): generated answer of bot
93
+
94
+ Returns:
95
+ bytes_object(bytes): suitable format for html autoplay sound option
96
+ """
97
+ tts = gTTS(text=text,
98
+ lang=main_language,
99
+ slow=False)
100
+ bytes_object = BytesIO()
101
+ tts.write_to_fp(bytes_object)
102
+ bytes_object.seek(0)
103
+ return bytes_object.getvalue()
104
+
105
+ logo_image_path="German_AI_Voicebot.png"
106
+ logo = f"<center><img src='file/{logo_image_path}' width=180px></center>"
107
+ gr.Interface(
108
+ fn=app,
109
+ inputs=[
110
+ gr.Audio(source="microphone", type="filepath",
111
+ ),
112
+ ],
113
+ outputs=[
114
+ gr.Textbox(label="You said: ").style(css="{color: red}"),
115
+ "html",
116
+ gr.Textbox(label="AI said: "),
117
+ gr.Textbox(label="AI said (English): "),
118
+ ],
119
+ live=True,
120
+ allow_flagging="never",
121
+ description=logo,
122
+ ).launch()
blender_model.py ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import (
2
+ AutoConfig,
3
+ BlenderbotSmallForConditionalGeneration,
4
+ logging
5
+ )
6
+ from transformers.modeling_outputs import (
7
+ Seq2SeqLMOutput,
8
+ BaseModelOutput,
9
+ )
10
+ from huggingface_hub import hf_hub_url, cached_download
11
+ from onnxruntime import (GraphOptimizationLevel,
12
+ InferenceSession,
13
+ SessionOptions)
14
+
15
+ from torch import from_numpy
16
+ from torch.nn import Module
17
+ from functools import reduce
18
+ from operator import iconcat
19
+
20
+ #supress huggingface warnings
21
+ logging.set_verbosity_error()
22
+
23
+ model_vocab_size=30000
24
+ model_card="remzicam/xs_blenderbot_onnx"
25
+ model_file_names=["blenderbot_small-90M-encoder-quantized.onnx",
26
+ "blenderbot_small-90M-decoder-quantized.onnx",
27
+ "blenderbot_small-90M-init-decoder-quantized.onnx"]
28
+
29
+ class BlenderEncoder(Module):
30
+ def __init__(self, encoder_sess):
31
+ super().__init__()
32
+ self.encoder = encoder_sess
33
+
34
+ def forward(
35
+ self,
36
+ input_ids,
37
+ attention_mask,
38
+ inputs_embeds=None,
39
+ head_mask=None,
40
+ output_attentions=None,
41
+ output_hidden_states=None,
42
+ return_dict=None,
43
+ ):
44
+
45
+ encoder_hidden_state = from_numpy(
46
+ self.encoder.run(
47
+ None,
48
+ {
49
+ "input_ids": input_ids.cpu().numpy(),
50
+ "attention_mask": attention_mask.cpu().numpy(),
51
+ },
52
+ )[0]
53
+ )
54
+
55
+ return BaseModelOutput(encoder_hidden_state)
56
+
57
+
58
+ class BlenderDecoderInit(Module):
59
+ def __init__(self, decoder_sess):
60
+ super().__init__()
61
+ self.decoder = decoder_sess
62
+
63
+ def forward(self, input_ids, encoder_attention_mask, encoder_hidden_states):
64
+
65
+ decoder_outputs = self.decoder.run(
66
+ None,
67
+ {
68
+ "input_ids": input_ids.cpu().numpy(),
69
+ "encoder_attention_mask": encoder_attention_mask.cpu().numpy(),
70
+ "encoder_hidden_states": encoder_hidden_states.cpu().numpy(),
71
+ },
72
+ )
73
+
74
+ list_pkv = tuple(from_numpy(x) for x in decoder_outputs[1:])
75
+
76
+ out_past_key_values = tuple(
77
+ list_pkv[i : i + 4] for i in range(0, len(list_pkv), 4)
78
+ )
79
+
80
+ return from_numpy(decoder_outputs[0]), out_past_key_values
81
+
82
+
83
+ class BlenderDecoder(Module):
84
+ def __init__(self, decoder_sess):
85
+ super().__init__()
86
+ self.decoder = decoder_sess
87
+
88
+ def forward(self, input_ids, attention_mask, encoder_output, past_key_values):
89
+
90
+ decoder_inputs = {
91
+ "input_ids": input_ids.cpu().numpy(),
92
+ "encoder_attention_mask": attention_mask.cpu().numpy(),
93
+ }
94
+
95
+ flat_past_key_values = reduce(iconcat, past_key_values, [])
96
+
97
+ past_key_values = {
98
+ f"pkv_{i}": pkv.cpu().numpy() for i, pkv in enumerate(flat_past_key_values)
99
+ }
100
+
101
+ decoder_outputs = self.decoder.run(None, {**decoder_inputs, **past_key_values})
102
+ # converts each value of the list to tensor from numpy
103
+ list_pkv = tuple(from_numpy(x) for x in decoder_outputs[1:])
104
+
105
+ # creates a tuple of tuples of shape 6x4 from the above tuple
106
+ out_past_key_values = tuple(
107
+ list_pkv[i : i + 4] for i in range(0, len(list_pkv), 4)
108
+ )
109
+
110
+ return from_numpy(decoder_outputs[0]), out_past_key_values
111
+
112
+
113
+ class OnnxBlender(BlenderbotSmallForConditionalGeneration):
114
+ """creates a Blender model using onnx sessions (encode, decoder & init_decoder)"""
115
+
116
+ def __init__(self, onnx_model_sessions):
117
+ config = AutoConfig.from_pretrained("facebook/blenderbot_small-90M")
118
+ config.vocab_size=model_vocab_size
119
+ super().__init__(config)
120
+
121
+ assert len(onnx_model_sessions) == 3, "all three models should be given"
122
+
123
+ encoder_sess, decoder_sess, decoder_sess_init = onnx_model_sessions
124
+
125
+ self.encoder = BlenderEncoder(encoder_sess)
126
+ self.decoder = BlenderDecoder(decoder_sess)
127
+ self.decoder_init = BlenderDecoderInit(decoder_sess_init)
128
+
129
+ def get_encoder(self):
130
+ return self.encoder
131
+
132
+ def get_decoder(self):
133
+ return self.decoder
134
+
135
+ def forward(
136
+ self,
137
+ input_ids=None,
138
+ attention_mask=None,
139
+ decoder_input_ids=None,
140
+ decoder_attention_mask=None,
141
+ head_mask=None,
142
+ decoder_head_mask=None,
143
+ cross_attn_head_mask=None,
144
+ encoder_outputs=None,
145
+ past_key_values=None,
146
+ inputs_embeds=None,
147
+ decoder_inputs_embeds=None,
148
+ labels=None,
149
+ use_cache=None,
150
+ output_attentions=None,
151
+ output_hidden_states=None,
152
+ return_dict=None,
153
+ ):
154
+
155
+ encoder_hidden_states = encoder_outputs[0]
156
+
157
+ if past_key_values is not None:
158
+ if decoder_input_ids is not None:
159
+ decoder_input_ids = decoder_input_ids[:, -1:]
160
+ if decoder_inputs_embeds is not None:
161
+ decoder_inputs_embeds = decoder_inputs_embeds[:, -1:]
162
+
163
+ if past_key_values is None:
164
+
165
+ # runs only for the first time:
166
+ init_onnx_outputs = self.decoder_init(
167
+ decoder_input_ids, attention_mask, encoder_hidden_states
168
+ )
169
+
170
+ logits, past_key_values = init_onnx_outputs
171
+
172
+ else:
173
+
174
+ onnx_outputs = self.decoder(
175
+ decoder_input_ids,
176
+ attention_mask,
177
+ encoder_hidden_states,
178
+ past_key_values,
179
+ )
180
+
181
+ logits, past_key_values = onnx_outputs
182
+
183
+ return Seq2SeqLMOutput(logits=logits, past_key_values=past_key_values)
184
+
185
+ class ModelLoad:
186
+ def __init__(self, model_card,file_names):
187
+ self.model_card=model_card
188
+ self.file_names=file_names
189
+
190
+ def model_file_downloader(self,model_card,filename):
191
+ config_file_url = hf_hub_url(model_card, filename)
192
+ model_file = cached_download(config_file_url)
193
+ return model_file
194
+
195
+ def inference_session(self,file_name):
196
+ model_file=self.model_file_downloader(self.model_card,file_name)
197
+ options = SessionOptions()
198
+ options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
199
+ return InferenceSession(model_file,options=options)
200
+
201
+ def __call__(self,model_config):
202
+ model=model_config([*map(self.inference_session,
203
+ self.file_names)])
204
+ return model
205
+
206
+ model_loader=ModelLoad(model_card,model_file_names)
207
+ blender_onnx_model=model_loader(OnnxBlender)
requirements.txt ADDED
Binary file (426 Bytes). View file