Rediones-AI / utils /caption_utils.py
Testys's picture
Made changes to the files that need it for testing on huggingface, Dockerfile included
0d42798
raw
history blame
4.99 kB
from transformers import BlipProcessor, BlipForConditionalGeneration, AutoTokenizer, AutoModelForSeq2SeqLM
import torch
import requests
from dotenv import load_dotenv
from image_utils import UrlTest
import os
img = UrlTest()
class ImageCaptioning:
def __init__(self):
# Initialize Model and Tokenizer
self.blip_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
self.blip_model = BlipForConditionalGeneration.from_pretrained('Salesforce/blip-image-captioning-base')
self.topic_generator_processor = AutoTokenizer.from_pretrained("google/flan-t5-large")
self.topic_generator_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large")
self.blip_model.eval()
self.topic_generator_model.eval()
def generate_caption(self, image):
# Generate Caption
input_text = self.blip_processor(image, return_tensors="pt")
outputs = self.blip_model.generate(pixel_values=input_text["pixel_values"], max_new_tokens=128, do_sample=True, temperature=0.5, top_k=50, top_p=0.95)
caption_output = [self.blip_processor.decode(output, skip_special_tokens=True) for output in outputs]
return outputs
def generate_topics(self, caption, additional_text=None, num_topics=3):
base_prompt = "Generate short, creative titles or topics based on the detailed information provided:"
# Construct the prompt based on whether additional context is provided
if additional_text:
full_prompt = (f"{base_prompt}\n\n"
f"Image description: {caption}\n\n"
f"Additional context: {additional_text}\n\n"
f"Task: Create {num_topics} inventive titles or topics (2-5 words each) that blend the essence of the image with the additional context. "
f"These titles should be imaginative and suitable for use as hashtags, image titles, or starting points for discussions."
f"IMPORTANT: Be imaginative and concise in your responses. Avoid repeating the same ideas in different words."
f"Also make sure to provide a title/topic that relates to every context provided while following the examples listed below as a way of being creative and intuitive."
)
else:
full_prompt = (f"{base_prompt}\n\n"
f"Image description: {caption}\n\n"
f"Task: Create {num_topics} inventive titles or topics (2-5 words each) that encapsulate the essence of the image. "
f"These titles should be imaginative and suitable for use as hashtags, image titles, or starting points for discussions."
f"IMPORTANT: Be imaginative and concise in your responses. Avoid repeating the same ideas in different words."
f"Also make sure to provide a title/topic that relates to every context provided while following the examples listed below as a way of being creative and intuitive."
)
# Provide creative examples to inspire the model
examples = """
Creative examples to inspire your titles/topics:
- "Misty Peaks at Dawn"
- "Graffiti Lanes of Urbania"
- "Chef’s Secret Ingredients"
- "Neon Future Skylines"
- "Puppy’s First Snow"
- "Edge of Adventure"
"""
# Append the examples to the prompt with a clear creative directive
full_prompt += f"\n{examples}\nNow, inspired by these examples, create {num_topics} short and descriptive titles/topics based on the information provided.\n"
print(full_prompt)
# Generate the topics using the T5 model with adjusted parameters
inputs = self.topic_generator_processor(full_prompt, return_tensors="pt", padding=True, truncation=True, max_length=512)
outputs = self.topic_generator_model.generate(
**inputs,
num_return_sequences=num_topics,
do_sample=True,
temperature=0.7,
max_length=32, # Reduced for shorter outputs
top_k=50,
top_p=0.95,
num_beams=5,
no_repeat_ngram_size=2
)
topics = [self.topic_generator_processor.decode(output, skip_special_tokens=True).strip() for output in outputs]
return [topic for topic in topics if topic and len(topic.split()) > 1]
def combo_model(self, image, additional_text=None):
image = img.load_image(image)
caption = self.generate_caption(image)
caption = self.blip_processor.decode(caption[0], skip_special_tokens=True)
topics = self.generate_topics(caption, additional_text)
return {
"caption": caption,
"topics": topics
}