Spaces:

martinpalinkov
/

content

Sleeping

App Files Files Community

martinpalinkov commited on Dec 6, 2024

Commit

cf3721c

verified ·

1 Parent(s): b65c131

Update app.py

Browse files

Files changed (1) hide show

app.py +133 -135

app.py CHANGED Viewed

@@ -1,135 +1,133 @@
-!pip install gradio transformers torch gtts
-import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer, MarianMTModel, MarianTokenizer, BlipProcessor, BlipForConditionalGeneration
-from gtts import gTTS
-import torch
-import logging
-import traceback
-from PIL import Image
-logging.basicConfig(filename="error_log.txt", level=logging.ERROR, format="%(asctime)s - %(message)s")
-chatbot_model_name = "microsoft/DialoGPT-medium"
-tokenizer = AutoTokenizer.from_pretrained(chatbot_model_name)
-chatbot_model = AutoModelForCausalLM.from_pretrained(chatbot_model_name)
-blip_model_name = "Salesforce/blip-image-captioning-base"
-blip_processor = BlipProcessor.from_pretrained(blip_model_name)
-blip_model = BlipForConditionalGeneration.from_pretrained(blip_model_name)
-def get_translation_model(src_lang, tgt_lang):
-    model_name = f'Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}'
-    model = MarianMTModel.from_pretrained(model_name)
-    tokenizer = MarianTokenizer.from_pretrained(model_name)
-    return model, tokenizer
-chat_history_ids = None
-MAX_LENGTH = 1024
-MAX_HISTORY_LENGTH = 5
-def generate_image_caption(image_path):
-    try:
-        image = Image.open(image_path)
-        image.show()
-        image = blip_processor(images=image, return_tensors="pt").pixel_values
-        with torch.no_grad():
-            caption = blip_model.generate(image, max_length=50, num_beams=5)
-        return blip_processor.decode(caption[0], skip_special_tokens=True)
-    except Exception as e:
-        logging.error(f"Error in BLIP image captioning: {str(e)}\n{traceback.format_exc()}")
-        return "Error processing image."
-def chatbot_with_image(message, language, image_path=None, reset=False):
-    global chat_history_ids
-    if reset:
-        chat_history_ids = None
-        return "Chat history reset.", None
-    if not message.strip() and not image_path:
-        return "Please enter a message or upload an image.", None
-    bot_response = ""
-    try:
-        if message.strip():
-            new_user_input_ids = tokenizer.encode(message + tokenizer.eos_token, return_tensors="pt")
-            if chat_history_ids is not None:
-                chat_history_ids = torch.cat([chat_history_ids, new_user_input_ids], dim=-1)
-            else:
-                chat_history_ids = new_user_input_ids
-            if chat_history_ids.shape[-1] > MAX_HISTORY_LENGTH * MAX_LENGTH:
-                chat_history_ids = chat_history_ids[:, -MAX_HISTORY_LENGTH * MAX_LENGTH:]
-            bot_input_ids = chat_history_ids
-            chat_history_ids = chatbot_model.generate(bot_input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id)
-            bot_response = tokenizer.decode(chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True)
-    except Exception as e:
-        bot_response = f"Error processing message: {str(e)}"
-        logging.error(f"Error in chatbot response generation: {str(e)}\n{traceback.format_exc()}")
-    if image_path:
-        try:
-            image_caption = generate_image_caption(image_path)
-            bot_response += f"The image shows: {image_caption}."
-        except Exception as e:
-            bot_response += f" Error processing image: {str(e)}"
-            logging.error(f"Error in image processing: {str(e)}\n{traceback.format_exc()}")
-    try:
-        if language != "en":
-            translation_model, translation_tokenizer = get_translation_model("en", language)
-            translated = translation_model.generate(**translation_tokenizer(bot_response, return_tensors="pt", padding=True, truncation=True))
-            bot_response = translation_tokenizer.decode(translated[0], skip_special_tokens=True)
-    except Exception as e:
-        bot_response += f" Error in translation: {str(e)}"
-        logging.error(f"Error in translation: {str(e)}\n{traceback.format_exc()}")
-    try:
-        tts = gTTS(bot_response, lang=language)
-        audio_path = "response.mp3"
-        tts.save(audio_path)
-    except Exception as e:
-        bot_response += f" Error generating TTS: {str(e)}"
-        logging.error(f"Error in TTS generation: {str(e)}\n{traceback.format_exc()}")
-        audio_path = None
-    return bot_response, audio_path
-with gr.Blocks() as demo:
-    with gr.Row():
-        gr.Markdown("### Chatbot with Image Understanding and Language Support")
-    with gr.Row():
-        output_audio = gr.Audio(label="Generated Speech", type="filepath")
-        output_text = gr.Textbox(label="Bot Response")
-    language_dropdown = gr.Dropdown(
-        choices=["en", "es", "fr", "de", "it", "zh", "pl"],
-        label="Select Language",
-        value="en"
-    )
-    image_input = gr.Image(label="Upload Image", type="filepath")
-    text_input = gr.Textbox(label="Your Message", placeholder="Type your message here...")
-    with gr.Row():
-        send_button = gr.Button("Send")
-        reset_button = gr.Button("Reset Chat")
-    send_button.click(
-        chatbot_with_image,
-        inputs=[text_input, language_dropdown, image_input, gr.State(False)],
-        outputs=[output_text, output_audio]
-    )
-    reset_button.click(
-        fn=lambda reset: ("Chat history reset.", None) if reset else ("", None),
-        inputs=[gr.State(True)],
-        outputs=[output_text, output_audio]
-    )
-if __name__ == "__main__":
-    demo.launch(share=True)

+import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer, MarianMTModel, MarianTokenizer, BlipProcessor, BlipForConditionalGeneration
+from gtts import gTTS
+import torch
+import logging
+import traceback
+from PIL import Image
+logging.basicConfig(filename="error_log.txt", level=logging.ERROR, format="%(asctime)s - %(message)s")
+chatbot_model_name = "microsoft/DialoGPT-medium"
+tokenizer = AutoTokenizer.from_pretrained(chatbot_model_name)
+chatbot_model = AutoModelForCausalLM.from_pretrained(chatbot_model_name)
+blip_model_name = "Salesforce/blip-image-captioning-base"
+blip_processor = BlipProcessor.from_pretrained(blip_model_name)
+blip_model = BlipForConditionalGeneration.from_pretrained(blip_model_name)
+def get_translation_model(src_lang, tgt_lang):
+    model_name = f'Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}'
+    model = MarianMTModel.from_pretrained(model_name)
+    tokenizer = MarianTokenizer.from_pretrained(model_name)
+    return model, tokenizer
+chat_history_ids = None
+MAX_LENGTH = 1024
+MAX_HISTORY_LENGTH = 5
+def generate_image_caption(image_path):
+    try:
+        image = Image.open(image_path)
+        image.show()
+        image = blip_processor(images=image, return_tensors="pt").pixel_values
+        with torch.no_grad():
+            caption = blip_model.generate(image, max_length=50, num_beams=5)
+        return blip_processor.decode(caption[0], skip_special_tokens=True)
+    except Exception as e:
+        logging.error(f"Error in BLIP image captioning: {str(e)}\n{traceback.format_exc()}")
+        return "Error processing image."
+def chatbot_with_image(message, language, image_path=None, reset=False):
+    global chat_history_ids
+    if reset:
+        chat_history_ids = None
+        return "Chat history reset.", None
+    if not message.strip() and not image_path:
+        return "Please enter a message or upload an image.", None
+    bot_response = ""
+    try:
+        if message.strip():
+            new_user_input_ids = tokenizer.encode(message + tokenizer.eos_token, return_tensors="pt")
+            if chat_history_ids is not None:
+                chat_history_ids = torch.cat([chat_history_ids, new_user_input_ids], dim=-1)
+            else:
+                chat_history_ids = new_user_input_ids
+            if chat_history_ids.shape[-1] > MAX_HISTORY_LENGTH * MAX_LENGTH:
+                chat_history_ids = chat_history_ids[:, -MAX_HISTORY_LENGTH * MAX_LENGTH:]
+            bot_input_ids = chat_history_ids
+            chat_history_ids = chatbot_model.generate(bot_input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id)
+            bot_response = tokenizer.decode(chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True)
+    except Exception as e:
+        bot_response = f"Error processing message: {str(e)}"
+        logging.error(f"Error in chatbot response generation: {str(e)}\n{traceback.format_exc()}")
+    if image_path:
+        try:
+            image_caption = generate_image_caption(image_path)
+            bot_response += f"The image shows: {image_caption}."
+        except Exception as e:
+            bot_response += f" Error processing image: {str(e)}"
+            logging.error(f"Error in image processing: {str(e)}\n{traceback.format_exc()}")
+    try:
+        if language != "en":
+            translation_model, translation_tokenizer = get_translation_model("en", language)
+            translated = translation_model.generate(**translation_tokenizer(bot_response, return_tensors="pt", padding=True, truncation=True))
+            bot_response = translation_tokenizer.decode(translated[0], skip_special_tokens=True)
+    except Exception as e:
+        bot_response += f" Error in translation: {str(e)}"
+        logging.error(f"Error in translation: {str(e)}\n{traceback.format_exc()}")
+    try:
+        tts = gTTS(bot_response, lang=language)
+        audio_path = "response.mp3"
+        tts.save(audio_path)
+    except Exception as e:
+        bot_response += f" Error generating TTS: {str(e)}"
+        logging.error(f"Error in TTS generation: {str(e)}\n{traceback.format_exc()}")
+        audio_path = None
+    return bot_response, audio_path
+with gr.Blocks() as demo:
+    with gr.Row():
+        gr.Markdown("### Chatbot with Image Understanding and Language Support")
+    with gr.Row():
+        output_audio = gr.Audio(label="Generated Speech", type="filepath")
+        output_text = gr.Textbox(label="Bot Response")
+    language_dropdown = gr.Dropdown(
+        choices=["en", "es", "fr", "de", "it", "zh", "pl"],
+        label="Select Language",
+        value="en"
+    )
+    image_input = gr.Image(label="Upload Image", type="filepath")
+    text_input = gr.Textbox(label="Your Message", placeholder="Type your message here...")
+    with gr.Row():
+        send_button = gr.Button("Send")
+        reset_button = gr.Button("Reset Chat")
+    send_button.click(
+        chatbot_with_image,
+        inputs=[text_input, language_dropdown, image_input, gr.State(False)],
+        outputs=[output_text, output_audio]
+    )
+    reset_button.click(
+        fn=lambda reset: ("Chat history reset.", None) if reset else ("", None),
+        inputs=[gr.State(True)],
+        outputs=[output_text, output_audio]
+    )
+if __name__ == "__main__":
+    demo.launch(share=True)