sachin commited on
Commit
5a68da9
·
1 Parent(s): d83c996

add speech inference

Browse files
Files changed (4) hide show
  1. recipes/urls.py +7 -1
  2. recipes/views.py +259 -9
  3. requirements.txt +5 -0
  4. spaces/urls.py +1 -1
recipes/urls.py CHANGED
@@ -1,6 +1,6 @@
1
  from django.urls import path
2
  from .views import recipe_generate_route
3
- from .views import VisionLLMView, NIMVisionLLMView, TextLLMView, TranslateLLMView
4
 
5
  urlpatterns = [
6
  path('recipe_generate/', recipe_generate_route, name='recipe_generate'),
@@ -8,4 +8,10 @@ urlpatterns = [
8
  path('nim_vision_llm_url/', NIMVisionLLMView.as_view()),
9
  path('text_llm_url/', TextLLMView.as_view()),
10
  path('translate_llm_url/', TranslateLLMView.as_view()),
 
 
 
 
 
 
11
  ]
 
1
  from django.urls import path
2
  from .views import recipe_generate_route
3
+ from .views import VisionLLMView, NIMVisionLLMView, TextLLMView, TranslateLLMView, SpeechLLMView, LlamaVisionView, IndicLLMView, TTSView, SpeechASRView, SpeechToSpeechView
4
 
5
  urlpatterns = [
6
  path('recipe_generate/', recipe_generate_route, name='recipe_generate'),
 
8
  path('nim_vision_llm_url/', NIMVisionLLMView.as_view()),
9
  path('text_llm_url/', TextLLMView.as_view()),
10
  path('translate_llm_url/', TranslateLLMView.as_view()),
11
+ path('speech_llm_url/', SpeechLLMView.as_view()),
12
+ path('speech_asr_url/', SpeechASRView.as_view()),
13
+ path('llama_vision_url/', LlamaVisionView.as_view()),
14
+ path('indic_llm_url/', IndicLLMView.as_view()),
15
+ path('tts_url/', TTSView.as_view()),
16
+ path('speech_to_speech_url/', SpeechToSpeechView.as_view()),
17
  ]
recipes/views.py CHANGED
@@ -9,6 +9,175 @@ import os
9
  import base64
10
  import json
11
  import requests
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  class TranslateLLMView(APIView):
14
  def post(self, request, format=None):
@@ -49,11 +218,10 @@ class TextLLMView(APIView):
49
  def post(self, request, format=None):
50
  try:
51
  data = request.data
52
- api_key = os.environ["MISTRAL_API_KEY"]
53
 
54
- # Initialize the Mistral client
55
- client = Mistral(api_key=api_key)
56
 
 
57
  prompt = data['messages'][0]['prompt']
58
  # Specify model
59
  #model = "pixtral-12b-2409"
@@ -71,13 +239,23 @@ class TextLLMView(APIView):
71
  }
72
  ]
73
 
74
- # Get the chat response
75
- chat_response = client.chat.complete(
76
- model=model,
77
- messages=messages
78
- )
 
 
 
 
 
 
 
 
 
 
 
79
 
80
- content = chat_response.choices[0].message.content
81
  #print(chat_response.choices[0].message.content)
82
  # Return the content of the response
83
  return Response({"response": content})
@@ -85,6 +263,40 @@ class TextLLMView(APIView):
85
  print(f"An error occurred: {e}")
86
  return Response({'error': 'Something went wrong'}, status=500)
87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
  @api_view(['GET'])
90
  def recipe_generate_route(request):
@@ -103,6 +315,44 @@ def recipe_generate_route(request):
103
  return Response(result)
104
 
105
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
 
107
  class VisionLLMView(APIView):
108
  def post(self, request, format=None):
 
9
  import base64
10
  import json
11
  import requests
12
+ from openai import OpenAI
13
+ from ollama import Client
14
+ from django.http import FileResponse
15
+ import io
16
+
17
+ class TTSView(APIView):
18
+ def post(self, request, format=None):
19
+ # Define the API endpoint
20
+ # Define the URL for the TTS API
21
+ url = 'http://localhost:5002/api/tts'
22
+
23
+ # Define the multiline text
24
+ text = "This is the first line"
25
+
26
+ # Prepare the parameters for the GET request
27
+ params = {
28
+ 'text': text
29
+ }
30
+
31
+ # Make the GET request
32
+ response = requests.get(url, params=params)
33
+
34
+ # Check if the request was successful
35
+ if response.status_code == 200:
36
+ # Save the audio response as a WAV file
37
+ # Create a file-like object with the audio data
38
+ audio_data = io.BytesIO(response.content)
39
+
40
+ # Return the audio file as a response
41
+ return FileResponse(audio_data, as_attachment=True, filename='audio_output.wav')
42
+ else:
43
+ return Response({"error": "Failed to synthesize speech"}, status=response.status_code)
44
+
45
+ class SpeechASRView(APIView):
46
+ def post(self, request, format=None):
47
+ try:
48
+ data = request.data
49
+ ##prompt = data['prompt']
50
+ audio = data['audio']
51
+
52
+ client = OpenAI(api_key="cant-be-empty", base_url="http://localhost:11800/v1/")
53
+
54
+ #filename= '/home/gaganyatri/Music/test1.flac'
55
+ audio_bytes = audio.read()
56
+
57
+ #audio_file = open(filename, "rb")
58
+
59
+ transcript = client.audio.transcriptions.create(
60
+ model="Systran/faster-distil-whisper-small.en", file=audio_bytes
61
+ )
62
+
63
+ #print(transcript.text)
64
+ voice_content = transcript.text
65
+ return Response({"response": voice_content})
66
+ except Exception as e:
67
+ print(f"An error occurred: {e}")
68
+ return Response({'error': 'Something went wrong'}, status=500)
69
+
70
+
71
+ class SpeechToSpeechView(APIView):
72
+ def post(self, request, format=None):
73
+ try:
74
+ data = request.data
75
+ ##prompt = data['prompt']
76
+ audio = data['audio']
77
+
78
+ client = OpenAI(api_key="cant-be-empty", base_url="http://localhost:11800/v1/")
79
+
80
+ #filename= '/home/gaganyatri/Music/test1.flac'
81
+ audio_bytes = audio.read()
82
+
83
+ #audio_file = open(filename, "rb")
84
+
85
+ transcript = client.audio.transcriptions.create(
86
+ model="Systran/faster-distil-whisper-small.en", file=audio_bytes
87
+ )
88
+
89
+ #print(transcript.text)
90
+ voice_content = transcript.text
91
+ #content = 'audio recieved'
92
+ system_prompt = "Please summarize the following prompt into a concise and clear statement:"
93
+
94
+
95
+ model = "mistral-nemo:latest"
96
+ client = Client(host='http://localhost:11434')
97
+ response = client.chat(
98
+ model=model,
99
+ messages=[
100
+ {
101
+ "role": "system",
102
+ "content": system_prompt
103
+ },
104
+ {
105
+ "role": "user",
106
+ "content": voice_content,
107
+ }
108
+ ],
109
+ )
110
+
111
+ # Extract the model's response about the image
112
+ response_text = response['message']['content'].strip()
113
+
114
+ url = 'http://localhost:5002/api/tts'
115
+
116
+ # Define the multiline text
117
+ #text = "This is the first line"
118
+
119
+ # Prepare the parameters for the GET request
120
+ params = {
121
+ 'text': response_text
122
+ }
123
+
124
+ # Make the GET request
125
+ response = requests.get(url, params=params)
126
+
127
+ # Check if the request was successful
128
+ if response.status_code == 200:
129
+ # Save the audio response as a WAV file
130
+ # Create a file-like object with the audio data
131
+ audio_data = io.BytesIO(response.content)
132
+
133
+ # Return the audio file as a response
134
+ return FileResponse(audio_data, as_attachment=True, filename='audio_output.wav')
135
+ else:
136
+ return Response({"error": "Failed to synthesize speech"}, status=response.status_code)
137
+
138
+ except Exception as e:
139
+ print(f"An error occurred: {e}")
140
+ return Response({'error': 'Something went wrong'}, status=500)
141
+
142
+ class SpeechLLMView(APIView):
143
+ def post(self, request, format=None):
144
+ try:
145
+ data = request.data
146
+ ##prompt = data['prompt']
147
+ audio = data['audio']
148
+
149
+ client = OpenAI(api_key="cant-be-empty", base_url="http://localhost:11800/v1/")
150
+
151
+ #filename= '/home/gaganyatri/Music/test1.flac'
152
+ audio_bytes = audio.read()
153
+
154
+ #audio_file = open(filename, "rb")
155
+
156
+ transcript = client.audio.transcriptions.create(
157
+ model="Systran/faster-distil-whisper-small.en", file=audio_bytes
158
+ )
159
+
160
+ #print(transcript.text)
161
+ voice_content = transcript.text
162
+ #content = 'audio recieved'
163
+
164
+ model = "mistral-nemo:latest"
165
+ client = Client(host='http://localhost:11434')
166
+ response = client.chat(
167
+ model=model,
168
+ messages=[{
169
+ "role": "user",
170
+ "content": voice_content,
171
+ }],
172
+ )
173
+
174
+ # Extract the model's response about the image
175
+ response_text = response['message']['content'].strip()
176
+
177
+ return Response({"response": response_text})
178
+ except Exception as e:
179
+ print(f"An error occurred: {e}")
180
+ return Response({'error': 'Something went wrong'}, status=500)
181
 
182
  class TranslateLLMView(APIView):
183
  def post(self, request, format=None):
 
218
  def post(self, request, format=None):
219
  try:
220
  data = request.data
 
221
 
222
+ isOnline = data['isOnline']
 
223
 
224
+ print(isOnline)
225
  prompt = data['messages'][0]['prompt']
226
  # Specify model
227
  #model = "pixtral-12b-2409"
 
239
  }
240
  ]
241
 
242
+ if(isOnline):
243
+ api_key = os.environ["MISTRAL_API_KEY"]
244
+
245
+ # Initialize the Mistral client
246
+ client = Mistral(api_key=api_key)
247
+
248
+
249
+ # Get the chat response
250
+ chat_response = client.chat.complete(
251
+ model=model,
252
+ messages=messages
253
+ )
254
+
255
+ content = chat_response.choices[0].message.content
256
+ else:
257
+ content = "helloWorld"
258
 
 
259
  #print(chat_response.choices[0].message.content)
260
  # Return the content of the response
261
  return Response({"response": content})
 
263
  print(f"An error occurred: {e}")
264
  return Response({'error': 'Something went wrong'}, status=500)
265
 
266
+ class IndicLLMView(APIView):
267
+ def post(self, request, format=None):
268
+ try:
269
+ data = request.data
270
+
271
+ isOnline = data['isOnline']
272
+
273
+ print(isOnline)
274
+ prompt = data['messages'][0]['prompt']
275
+ # Specify model
276
+ #model = "pixtral-12b-2409"
277
+ model = data['model']
278
+ # Define the messages for the chat
279
+
280
+ client = Client(host='http://localhost:11434')
281
+ response = client.chat(
282
+ model=model,
283
+ messages=[{
284
+ "role": "user",
285
+ "content": prompt,
286
+ }],
287
+ )
288
+
289
+ # Extract the model's response about the image
290
+ response_text = response['message']['content'].strip()
291
+
292
+ #print(chat_response.choices[0].message.content)
293
+ # Return the content of the response
294
+ return Response({"response": response_text})
295
+ except Exception as e:
296
+ print(f"An error occurred: {e}")
297
+ return Response({'error': 'Something went wrong'}, status=500)
298
+
299
+
300
 
301
  @api_view(['GET'])
302
  def recipe_generate_route(request):
 
315
  return Response(result)
316
 
317
 
318
+ class LlamaVisionView(APIView):
319
+ def post(self, request, format=None):
320
+ try:
321
+ data = request.data
322
+
323
+ image_data = (data['messages'][0]['image'][0])
324
+ prompt = data['messages'][0]['prompt']
325
+ # Specify model
326
+ #model = "pixtral-12b-2409"
327
+ model = data['model']
328
+ # Define the messages for the chat
329
+
330
+ # Define the messages for the chat
331
+
332
+ client = Client(host='http://localhost:21434')
333
+ response = client.chat(
334
+ model="x/llama3.2-vision:latest",
335
+ messages=[{
336
+ "role": "user",
337
+ "content": prompt,
338
+ "images": [image_data]
339
+ }],
340
+ )
341
+
342
+ # Extract the model's response about the image
343
+ response_text = response['message']['content'].strip()
344
+
345
+ print(response_text)
346
+ content = response_text
347
+
348
+
349
+ #print(chat_response.choices[0].message.content)
350
+ # Return the content of the response
351
+ return Response({"response": content})
352
+ except Exception as e:
353
+ print(f"An error occurred: {e}")
354
+ return Response({'error': 'Something went wrong'}, status=500)
355
+
356
 
357
  class VisionLLMView(APIView):
358
  def post(self, request, format=None):
requirements.txt CHANGED
@@ -7,6 +7,7 @@ charset-normalizer==3.3.2
7
  click==8.1.7
8
  cryptography==43.0.1
9
  defusedxml==0.8.0rc2
 
10
  Django==5.1.1
11
  django-cors-headers==4.4.0
12
  django-filter==24.3
@@ -24,12 +25,15 @@ httpcore==1.0.5
24
  httpx==0.27.2
25
  idna==3.10
26
  inflection==0.5.1
 
27
  jsonpath-python==1.0.6
28
  Markdown==3.7
29
  mistralai==1.1.0
30
  mypy-extensions==1.0.0
31
  numpy==1.26.4
32
  oauthlib==3.2.2
 
 
33
  packaging==24.1
34
  pandas==2.2.3
35
  pycparser==2.22
@@ -47,6 +51,7 @@ sniffio==1.3.1
47
  social-auth-app-django==5.4.2
48
  social-auth-core==4.5.4
49
  sqlparse==0.5.1
 
50
  typing-inspect==0.9.0
51
  typing_extensions==4.12.2
52
  tzdata==2024.2
 
7
  click==8.1.7
8
  cryptography==43.0.1
9
  defusedxml==0.8.0rc2
10
+ distro==1.9.0
11
  Django==5.1.1
12
  django-cors-headers==4.4.0
13
  django-filter==24.3
 
25
  httpx==0.27.2
26
  idna==3.10
27
  inflection==0.5.1
28
+ jiter==0.6.1
29
  jsonpath-python==1.0.6
30
  Markdown==3.7
31
  mistralai==1.1.0
32
  mypy-extensions==1.0.0
33
  numpy==1.26.4
34
  oauthlib==3.2.2
35
+ ollama==0.3.3
36
+ openai==1.51.2
37
  packaging==24.1
38
  pandas==2.2.3
39
  pycparser==2.22
 
51
  social-auth-app-django==5.4.2
52
  social-auth-core==4.5.4
53
  sqlparse==0.5.1
54
+ tqdm==4.66.5
55
  typing-inspect==0.9.0
56
  typing_extensions==4.12.2
57
  tzdata==2024.2
spaces/urls.py CHANGED
@@ -26,7 +26,7 @@ schema_view = get_schema_view(
26
  default_version='v1',
27
  description="API for Space Operations",
28
  terms_of_service="https://www.gaganyatri.in/",
29
- contact=openapi.Contact(email="contact@yourapp.com"),
30
  license=openapi.License(name="MIT License"),
31
  ),
32
  public=True,
 
26
  default_version='v1',
27
  description="API for Space Operations",
28
  terms_of_service="https://www.gaganyatri.in/",
29
+ contact=openapi.Contact(email="info@slabstech.com"),
30
  license=openapi.License(name="MIT License"),
31
  ),
32
  public=True,