!pip install gradio transformers torch gtts import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer, MarianMTModel, MarianTokenizer, BlipProcessor, BlipForConditionalGeneration from gtts import gTTS import torch import logging import traceback from PIL import Image logging.basicConfig(filename="error_log.txt", level=logging.ERROR, format="%(asctime)s - %(message)s") chatbot_model_name = "microsoft/DialoGPT-medium" tokenizer = AutoTokenizer.from_pretrained(chatbot_model_name) chatbot_model = AutoModelForCausalLM.from_pretrained(chatbot_model_name) blip_model_name = "Salesforce/blip-image-captioning-base" blip_processor = BlipProcessor.from_pretrained(blip_model_name) blip_model = BlipForConditionalGeneration.from_pretrained(blip_model_name) def get_translation_model(src_lang, tgt_lang): model_name = f'Helsinki-NLP/opus-mt-{src_lang}-{tgt_lang}' model = MarianMTModel.from_pretrained(model_name) tokenizer = MarianTokenizer.from_pretrained(model_name) return model, tokenizer chat_history_ids = None MAX_LENGTH = 1024 MAX_HISTORY_LENGTH = 5 def generate_image_caption(image_path): try: image = Image.open(image_path) image.show() image = blip_processor(images=image, return_tensors="pt").pixel_values with torch.no_grad(): caption = blip_model.generate(image, max_length=50, num_beams=5) return blip_processor.decode(caption[0], skip_special_tokens=True) except Exception as e: logging.error(f"Error in BLIP image captioning: {str(e)}\n{traceback.format_exc()}") return "Error processing image." def chatbot_with_image(message, language, image_path=None, reset=False): global chat_history_ids if reset: chat_history_ids = None return "Chat history reset.", None if not message.strip() and not image_path: return "Please enter a message or upload an image.", None bot_response = "" try: if message.strip(): new_user_input_ids = tokenizer.encode(message + tokenizer.eos_token, return_tensors="pt") if chat_history_ids is not None: chat_history_ids = torch.cat([chat_history_ids, new_user_input_ids], dim=-1) else: chat_history_ids = new_user_input_ids if chat_history_ids.shape[-1] > MAX_HISTORY_LENGTH * MAX_LENGTH: chat_history_ids = chat_history_ids[:, -MAX_HISTORY_LENGTH * MAX_LENGTH:] bot_input_ids = chat_history_ids chat_history_ids = chatbot_model.generate(bot_input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id) bot_response = tokenizer.decode(chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True) except Exception as e: bot_response = f"Error processing message: {str(e)}" logging.error(f"Error in chatbot response generation: {str(e)}\n{traceback.format_exc()}") if image_path: try: image_caption = generate_image_caption(image_path) bot_response += f"The image shows: {image_caption}." except Exception as e: bot_response += f" Error processing image: {str(e)}" logging.error(f"Error in image processing: {str(e)}\n{traceback.format_exc()}") try: if language != "en": translation_model, translation_tokenizer = get_translation_model("en", language) translated = translation_model.generate(**translation_tokenizer(bot_response, return_tensors="pt", padding=True, truncation=True)) bot_response = translation_tokenizer.decode(translated[0], skip_special_tokens=True) except Exception as e: bot_response += f" Error in translation: {str(e)}" logging.error(f"Error in translation: {str(e)}\n{traceback.format_exc()}") try: tts = gTTS(bot_response, lang=language) audio_path = "response.mp3" tts.save(audio_path) except Exception as e: bot_response += f" Error generating TTS: {str(e)}" logging.error(f"Error in TTS generation: {str(e)}\n{traceback.format_exc()}") audio_path = None return bot_response, audio_path with gr.Blocks() as demo: with gr.Row(): gr.Markdown("### Chatbot with Image Understanding and Language Support") with gr.Row(): output_audio = gr.Audio(label="Generated Speech", type="filepath") output_text = gr.Textbox(label="Bot Response") language_dropdown = gr.Dropdown( choices=["en", "es", "fr", "de", "it", "zh", "pl"], label="Select Language", value="en" ) image_input = gr.Image(label="Upload Image", type="filepath") text_input = gr.Textbox(label="Your Message", placeholder="Type your message here...") with gr.Row(): send_button = gr.Button("Send") reset_button = gr.Button("Reset Chat") send_button.click( chatbot_with_image, inputs=[text_input, language_dropdown, image_input, gr.State(False)], outputs=[output_text, output_audio] ) reset_button.click( fn=lambda reset: ("Chat history reset.", None) if reset else ("", None), inputs=[gr.State(True)], outputs=[output_text, output_audio] ) if __name__ == "__main__": demo.launch(share=True)