Spaces:

gaganyatri
/

django_spaces

Running

App Files Files Community

sachin commited on Nov 3, 2024

Commit

5a68da9

1 Parent(s): d83c996

add speech inference

Browse files

Files changed (4) hide show

recipes/urls.py +7 -1
recipes/views.py +259 -9
requirements.txt +5 -0
spaces/urls.py +1 -1

recipes/urls.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from django.urls import path
 from .views import  recipe_generate_route
-from .views import VisionLLMView, NIMVisionLLMView, TextLLMView, TranslateLLMView
 urlpatterns = [
     path('recipe_generate/', recipe_generate_route, name='recipe_generate'),
@@ -8,4 +8,10 @@ urlpatterns = [
     path('nim_vision_llm_url/', NIMVisionLLMView.as_view()),
     path('text_llm_url/', TextLLMView.as_view()),
     path('translate_llm_url/', TranslateLLMView.as_view()),
 ]

 from django.urls import path
 from .views import  recipe_generate_route
+from .views import VisionLLMView, NIMVisionLLMView, TextLLMView, TranslateLLMView, SpeechLLMView, LlamaVisionView, IndicLLMView, TTSView, SpeechASRView, SpeechToSpeechView
 urlpatterns = [
     path('recipe_generate/', recipe_generate_route, name='recipe_generate'),
     path('nim_vision_llm_url/', NIMVisionLLMView.as_view()),
     path('text_llm_url/', TextLLMView.as_view()),
     path('translate_llm_url/', TranslateLLMView.as_view()),
+    path('speech_llm_url/', SpeechLLMView.as_view()),
+    path('speech_asr_url/', SpeechASRView.as_view()),
+    path('llama_vision_url/', LlamaVisionView.as_view()),
+    path('indic_llm_url/', IndicLLMView.as_view()),
+    path('tts_url/', TTSView.as_view()),
+    path('speech_to_speech_url/', SpeechToSpeechView.as_view()),
 ]

recipes/views.py CHANGED Viewed

@@ -9,6 +9,175 @@ import os
 import base64
 import json
 import requests
 class TranslateLLMView(APIView):
     def post(self, request, format=None):
@@ -49,11 +218,10 @@ class TextLLMView(APIView):
     def post(self, request, format=None):
         try:
             data = request.data
-            api_key = os.environ["MISTRAL_API_KEY"]
-            # Initialize the Mistral client
-            client = Mistral(api_key=api_key)
             prompt =  data['messages'][0]['prompt']
             # Specify model
             #model = "pixtral-12b-2409"
@@ -71,13 +239,23 @@ class TextLLMView(APIView):
                 }
             ]
-            # Get the chat response
-            chat_response = client.chat.complete(
-                model=model,
-                messages=messages
-            )
-            content = chat_response.choices[0].message.content
             #print(chat_response.choices[0].message.content)
             # Return the content of the response
             return Response({"response": content})
@@ -85,6 +263,40 @@ class TextLLMView(APIView):
             print(f"An error occurred: {e}")
             return Response({'error': 'Something went wrong'}, status=500)
 @api_view(['GET'])
 def recipe_generate_route(request):
@@ -103,6 +315,44 @@ def recipe_generate_route(request):
     return Response(result)
 class VisionLLMView(APIView):
     def post(self, request, format=None):

 import base64
 import json
 import requests
+from openai import OpenAI
+from ollama import Client
+from django.http import FileResponse
+import io
+class TTSView(APIView):
+    def post(self, request, format=None):
+        # Define the API endpoint
+        # Define the URL for the TTS API
+        url = 'http://localhost:5002/api/tts'
+        # Define the multiline text
+        text = "This is the first line"
+        # Prepare the parameters for the GET request
+        params = {
+            'text': text
+        }
+        # Make the GET request
+        response = requests.get(url, params=params)
+        # Check if the request was successful
+        if response.status_code == 200:
+            # Save the audio response as a WAV file
+            # Create a file-like object with the audio data
+            audio_data = io.BytesIO(response.content)
+            # Return the audio file as a response
+            return FileResponse(audio_data, as_attachment=True, filename='audio_output.wav')
+        else:
+            return Response({"error": "Failed to synthesize speech"}, status=response.status_code)
+class SpeechASRView(APIView):
+    def post(self, request, format=None):
+        try:
+            data = request.data
+            ##prompt =  data['prompt']
+            audio = data['audio']
+            client = OpenAI(api_key="cant-be-empty", base_url="http://localhost:11800/v1/")
+            #filename= '/home/gaganyatri/Music/test1.flac'
+            audio_bytes = audio.read()
+            #audio_file = open(filename, "rb")
+            transcript = client.audio.transcriptions.create(
+                model="Systran/faster-distil-whisper-small.en", file=audio_bytes
+            )
+            #print(transcript.text)
+            voice_content = transcript.text
+            return Response({"response": voice_content})
+        except Exception as e:
+            print(f"An error occurred: {e}")
+            return Response({'error': 'Something went wrong'}, status=500)
+class SpeechToSpeechView(APIView):
+    def post(self, request, format=None):
+        try:
+            data = request.data
+            ##prompt =  data['prompt']
+            audio = data['audio']
+            client = OpenAI(api_key="cant-be-empty", base_url="http://localhost:11800/v1/")
+            #filename= '/home/gaganyatri/Music/test1.flac'
+            audio_bytes = audio.read()
+            #audio_file = open(filename, "rb")
+            transcript = client.audio.transcriptions.create(
+                model="Systran/faster-distil-whisper-small.en", file=audio_bytes
+            )
+            #print(transcript.text)
+            voice_content = transcript.text
+                        #content = 'audio recieved'
+            system_prompt = "Please summarize the following prompt into a concise and clear statement:"
+            model = "mistral-nemo:latest"
+            client = Client(host='http://localhost:11434')
+            response = client.chat(
+            model=model,
+            messages=[
+                {
+                    "role": "system",
+                    "content": system_prompt
+                },
+                {
+                    "role": "user",
+                    "content": voice_content,
+                }
+            ],
+            )
+            # Extract the model's response about the image
+            response_text = response['message']['content'].strip()
+            url = 'http://localhost:5002/api/tts'
+            # Define the multiline text
+            #text = "This is the first line"
+            # Prepare the parameters for the GET request
+            params = {
+                'text': response_text
+            }
+            # Make the GET request
+            response = requests.get(url, params=params)
+            # Check if the request was successful
+            if response.status_code == 200:
+                # Save the audio response as a WAV file
+                # Create a file-like object with the audio data
+                audio_data = io.BytesIO(response.content)
+                # Return the audio file as a response
+                return FileResponse(audio_data, as_attachment=True, filename='audio_output.wav')
+            else:
+                return Response({"error": "Failed to synthesize speech"}, status=response.status_code)
+        except Exception as e:
+            print(f"An error occurred: {e}")
+            return Response({'error': 'Something went wrong'}, status=500)
+class SpeechLLMView(APIView):
+    def post(self, request, format=None):
+        try:
+            data = request.data
+            ##prompt =  data['prompt']
+            audio = data['audio']
+            client = OpenAI(api_key="cant-be-empty", base_url="http://localhost:11800/v1/")
+            #filename= '/home/gaganyatri/Music/test1.flac'
+            audio_bytes = audio.read()
+            #audio_file = open(filename, "rb")
+            transcript = client.audio.transcriptions.create(
+                model="Systran/faster-distil-whisper-small.en", file=audio_bytes
+            )
+            #print(transcript.text)
+            voice_content = transcript.text
+                        #content = 'audio recieved'
+            model = "mistral-nemo:latest"
+            client = Client(host='http://localhost:11434')
+            response = client.chat(
+            model=model,
+            messages=[{
+            "role": "user",
+            "content": voice_content,
+            }],
+            )
+            # Extract the model's response about the image
+            response_text = response['message']['content'].strip()
+            return Response({"response": response_text})
+        except Exception as e:
+            print(f"An error occurred: {e}")
+            return Response({'error': 'Something went wrong'}, status=500)
 class TranslateLLMView(APIView):
     def post(self, request, format=None):
     def post(self, request, format=None):
         try:
             data = request.data
+            isOnline = data['isOnline']
+            print(isOnline)
             prompt =  data['messages'][0]['prompt']
             # Specify model
             #model = "pixtral-12b-2409"
                 }
             ]
+            if(isOnline):
+                api_key = os.environ["MISTRAL_API_KEY"]
+                # Initialize the Mistral client
+                client = Mistral(api_key=api_key)
+                # Get the chat response
+                chat_response = client.chat.complete(
+                    model=model,
+                    messages=messages
+                )
+                content = chat_response.choices[0].message.content
+            else:
+                content = "helloWorld"
             #print(chat_response.choices[0].message.content)
             # Return the content of the response
             return Response({"response": content})
             print(f"An error occurred: {e}")
             return Response({'error': 'Something went wrong'}, status=500)
+class IndicLLMView(APIView):
+    def post(self, request, format=None):
+        try:
+            data = request.data
+            isOnline = data['isOnline']
+            print(isOnline)
+            prompt =  data['messages'][0]['prompt']
+            # Specify model
+            #model = "pixtral-12b-2409"
+            model = data['model']
+            # Define the messages for the chat
+            client = Client(host='http://localhost:11434')
+            response = client.chat(
+            model=model,
+            messages=[{
+            "role": "user",
+            "content": prompt,
+            }],
+            )
+            # Extract the model's response about the image
+            response_text = response['message']['content'].strip()
+            #print(chat_response.choices[0].message.content)
+            # Return the content of the response
+            return Response({"response": response_text})
+        except Exception as e:
+            print(f"An error occurred: {e}")
+            return Response({'error': 'Something went wrong'}, status=500)
 @api_view(['GET'])
 def recipe_generate_route(request):
     return Response(result)
+class LlamaVisionView(APIView):
+    def post(self, request, format=None):
+        try:
+            data = request.data
+            image_data = (data['messages'][0]['image'][0])
+            prompt =  data['messages'][0]['prompt']
+            # Specify model
+            #model = "pixtral-12b-2409"
+            model = data['model']
+            # Define the messages for the chat
+            # Define the messages for the chat
+            client = Client(host='http://localhost:21434')
+            response = client.chat(
+            model="x/llama3.2-vision:latest",
+            messages=[{
+            "role": "user",
+            "content": prompt,
+            "images": [image_data]
+            }],
+            )
+            # Extract the model's response about the image
+            response_text = response['message']['content'].strip()
+            print(response_text)
+            content = response_text
+            #print(chat_response.choices[0].message.content)
+            # Return the content of the response
+            return Response({"response": content})
+        except Exception as e:
+            print(f"An error occurred: {e}")
+            return Response({'error': 'Something went wrong'}, status=500)
 class VisionLLMView(APIView):
     def post(self, request, format=None):

requirements.txt CHANGED Viewed

@@ -7,6 +7,7 @@ charset-normalizer==3.3.2
 click==8.1.7
 cryptography==43.0.1
 defusedxml==0.8.0rc2
 Django==5.1.1
 django-cors-headers==4.4.0
 django-filter==24.3
@@ -24,12 +25,15 @@ httpcore==1.0.5
 httpx==0.27.2
 idna==3.10
 inflection==0.5.1
 jsonpath-python==1.0.6
 Markdown==3.7
 mistralai==1.1.0
 mypy-extensions==1.0.0
 numpy==1.26.4
 oauthlib==3.2.2
 packaging==24.1
 pandas==2.2.3
 pycparser==2.22
@@ -47,6 +51,7 @@ sniffio==1.3.1
 social-auth-app-django==5.4.2
 social-auth-core==4.5.4
 sqlparse==0.5.1
 typing-inspect==0.9.0
 typing_extensions==4.12.2
 tzdata==2024.2

 click==8.1.7
 cryptography==43.0.1
 defusedxml==0.8.0rc2
+distro==1.9.0
 Django==5.1.1
 django-cors-headers==4.4.0
 django-filter==24.3
 httpx==0.27.2
 idna==3.10
 inflection==0.5.1
+jiter==0.6.1
 jsonpath-python==1.0.6
 Markdown==3.7
 mistralai==1.1.0
 mypy-extensions==1.0.0
 numpy==1.26.4
 oauthlib==3.2.2
+ollama==0.3.3
+openai==1.51.2
 packaging==24.1
 pandas==2.2.3
 pycparser==2.22
 social-auth-app-django==5.4.2
 social-auth-core==4.5.4
 sqlparse==0.5.1
+tqdm==4.66.5
 typing-inspect==0.9.0
 typing_extensions==4.12.2
 tzdata==2024.2

spaces/urls.py CHANGED Viewed

@@ -26,7 +26,7 @@ schema_view = get_schema_view(
         default_version='v1',
         description="API for Space Operations",
         terms_of_service="https://www.gaganyatri.in/",
-        contact=openapi.Contact(email="contact@yourapp.com"),
         license=openapi.License(name="MIT License"),
     ),
     public=True,

         default_version='v1',
         description="API for Space Operations",
         terms_of_service="https://www.gaganyatri.in/",
+        contact=openapi.Contact(email="info@slabstech.com"),
         license=openapi.License(name="MIT License"),
     ),
     public=True,