Spaces:
Sleeping
Sleeping
File size: 19,408 Bytes
fe4e4c5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 |
#Imports
import gradio as gr
from PIL import Image, UnidentifiedImageError
from gtts import gTTS
import requests
import re
import torch
from transformers import CLIPProcessor, CLIPModel, pipeline
from sentence_transformers import SentenceTransformer, util
from langdetect import detect
from io import BytesIO
import pandas as pd
import numpy as np
import soundfile as sf
import os
import subprocess
# Run the setup script to install espeak-ng
subprocess.run(['bash', 'setup.sh'], check=True)
# DataFrame with information about the Paintings as image url, Title, description , stroy
data = {
"image_url": [
"https://s.turbifycdn.com/aah/gallerydirectart/vincent-van-gogh-estate-signed-limited-edition-giclee-starry-night-47.png", # Starry Night
"https://cdn.mos.cms.futurecdn.net/xRqbwS4odpkSQscn3jHECh-1200-80.jpg", # Mona Lisa
"https://upload.wikimedia.org/wikipedia/en/d/dd/The_Persistence_of_Memory.jpg", # The Persistence of Memory
"https://static.wixstatic.com/media/1071a8_cf1930f883e043e28d03d5a26a5960ef~mv2.jpg/v1/fill/w_568,h_718,al_c,q_85,usm_0.66_1.00_0.01,enc_auto/1071a8_cf1930f883e043e28d03d5a26a5960ef~mv2.jpg", # The Scream
"https://images.artbrokerage.com/artthumb/magritte_158194_1/625x559/Rene_Magritte_Le_Fils_De_lhomme_the_Son_of_Man_1973.jpg", # The Son of Man
"https://www.artic.edu/iiif/2/25c31d8d-21a4-9ea1-1d73-6a2eca4dda7e/full/843,/0/default.jpg", # The Bedroom
"https://images.desenio.com/zoom/17047_1.jpg", # Girl with a Pearl Earring
"https://www.hastingsindependentpress.co.uk/wp-content/uploads/2021/03/Whistlers-Mother.jpg", # Whistler’s Mother
"https://live.staticflickr.com/7173/6713746433_652c3d9d4e_c.jpg" # The Basket of Apples
],
"Title": [
"Starry Night", "Mona Lisa", "The Persistence of Memory", "The Scream",
"The Son of Man", "The Bedroom",
"Girl with a Pearl Earring", "Whistler’s Mother", "The Basket of Apples"
],
"Description": [
# Starry Night
("Starry Night by Vincent van Gogh, painted in 1889, is one of the most famous works of art in the world. "
"It depicts a swirling night sky filled with stars over a small town. The painting uses vibrant colors like blue and yellow, "
"with exaggerated swirling patterns that create a dreamlike, almost chaotic feeling."),
# Mona Lisa
("The Mona Lisa by Leonardo da Vinci, painted between 1503 and 1506, is a portrait of a woman with a subtle, enigmatic smile. "
"The use of muted colors, including soft browns, greens, and black, emphasizes the serene and mysterious nature of the subject. "
"It is one of the most studied and recognized works of art in history."),
# The Persistence of Memory
("The Persistence of Memory, created by Salvador Dalí in 1931, features melting clocks draped over a surreal landscape. "
"The painting, primarily in soft shades of brown, blue, and yellow, explores themes of time and memory. The abstract shapes "
"and dreamlike atmosphere make it one of Dalí’s most famous surrealist works."),
# The Scream
("The Scream by Edvard Munch, painted in 1893, is one of the most iconic images in modern art. "
"It depicts a figure standing on a bridge, clutching their face in agony, as a blood-red sky swirls behind them. "
"The painting uses bold reds, oranges, and blues to evoke a sense of horror and existential despair."),
# The Son of Man
("The Son of Man by René Magritte, painted in 1964, is a surrealist self-portrait of the artist. "
"It depicts a man in a bowler hat and suit, with his face obscured by a floating green apple. "
"The background features a cloudy sky and a low wall, contributing to the dreamlike atmosphere. The painting is rich in symbolism, "
"exploring themes of identity, concealment, and perception."),
# The Bedroom
("The Bedroom by Vincent van Gogh, painted in 1888, depicts the artist’s simple bedroom in Arles, France. "
"The painting uses bold, contrasting colors—yellow, red, and blue—to create a vibrant, almost childlike view of the space. "
"Van Gogh painted this scene three times, each version representing his sense of comfort and sanctuary in his personal space."),
# Girl with a Pearl Earring
("Girl with a Pearl Earring by Johannes Vermeer, painted in 1665, is often referred to as the 'Mona Lisa of the North.' "
"The painting shows a young girl looking over her shoulder, wearing a large pearl earring. The use of light and shadow, "
"combined with soft colors like blue and yellow, creates a lifelike, intimate portrait."),
# Whistler’s Mother
("Whistler’s Mother by James McNeill Whistler, painted in 1871, is a portrait of the artist’s mother seated in profile. "
"The painting uses muted tones of black, gray, and brown, reflecting the simplicity and dignity of the subject. "
"It has become an icon of motherhood and restraint."),
# The Basket of Apples
("The Basket of Apples by Paul Cézanne, painted around 1895, is a still life that challenges traditional perspectives. "
"The painting shows a table with a basket of apples, a bottle, and bread. The use of soft colors, including browns, reds, and greens, "
"along with the tilted angles, makes the objects seem to float, blurring the line between realism and abstraction.")
],
"Story": [
# Starry Night
("Vincent van Gogh painted 'Starry Night' while in a mental asylum in Saint-Rémy-de-Provence, France. "
"It was created from memory and imagination, rather than a direct view from his window. The swirling patterns "
"are thought to represent his emotional turbulence at the time. The painting is celebrated for its bold brushstrokes "
"and imaginative use of color, representing the tension between beauty and chaos in the natural world."),
# Mona Lisa
("'Mona Lisa' was painted by Leonardo da Vinci during the Renaissance period. The subject of the painting, "
"believed to be Lisa Gherardini, is famed for her mysterious smile. The painting's sfumato technique, blending "
"soft transitions between light and shadow, creates a lifelike, three-dimensional appearance. The Mona Lisa has inspired "
"countless studies and interpretations over the centuries, and its theft in 1911 only increased its mystique."),
# The Persistence of Memory
("Salvador Dalí's 'The Persistence of Memory' is a surrealist masterpiece that reflects the fluidity of time and memory. "
"The melting clocks draped over the landscape suggest the passage of time becoming meaningless. The inspiration for the painting "
"came from a melting camembert cheese. Dalí’s fascination with dream states and Freud's theories of the unconscious mind "
"are evident in this strange, dreamlike scene."),
# The Scream
("'The Scream' by Edvard Munch is a vivid expression of anxiety and existential dread. Munch was inspired to create the work "
"after a walk during which he felt the 'great scream' of nature overwhelm him. The distorted figure and fiery red sky reflect "
"Munch’s inner turmoil. The painting has become an iconic representation of human anxiety and has been widely referenced in pop culture."),
# The Son of Man
("René Magritte’s 'The Son of Man' is a quintessential example of surrealism, blending reality and fantasy. "
"The painting is a self-portrait with Magritte’s face hidden by a hovering green apple, symbolizing the tension between what is visible "
"and what is hidden. The painting has been widely interpreted as a statement on identity and the nature of perception."),
# The Bedroom
("'The Bedroom' by Vincent van Gogh is a reflection of the artist’s longing for stability and tranquility. "
"The painting was created during one of the few peaceful periods in van Gogh’s turbulent life, and the vibrant colors convey "
"his emotions at the time. The bold, contrasting colors and exaggerated perspective make the simple room appear almost alive."),
# Girl with a Pearl Earring
("'Girl with a Pearl Earring' by Johannes Vermeer is one of the most enigmatic portraits in Western art. Known for its simplicity and elegance, "
"the painting captures a fleeting moment of connection between the viewer and the subject. The girl’s mysterious gaze and the radiant light "
"on her face have captivated audiences for centuries."),
# Whistler’s Mother
("James McNeill Whistler’s 'Arrangement in Grey and Black No. 1,' more commonly known as 'Whistler’s Mother,' is a stark, dignified portrait "
"of the artist’s mother. The painting is renowned for its minimalist composition and restrained use of color. Its iconic status grew after "
"its display at the Musée d'Orsay in Paris, becoming a symbol of maternal devotion and calm."),
# The Basket of Apples
("Paul Cézanne’s 'The Basket of Apples' is a revolutionary work that defies the traditional rules of perspective. By tilting objects at different angles, "
"Cézanne challenges the viewer’s perception of space and reality. This still life is often cited as a precursor to Cubism, and its soft color palette "
"creates a serene yet dynamic composition.")
]
}
df = pd.DataFrame(data)
# Load models
# Determine if a GPU (CUDA) is available
device = "cuda" if torch.cuda.is_available() else "cpu"
# TTS model
narrator = pipeline("text-to-speech", model="kakao-enterprise/vits-ljs", device=device)
# Load the CLIP model and processor
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
# Load the semantic similarity model for description search
semantic_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2', device=device)
# Load the translation models for Arabic to English and English to Arabic translations
translator_ar_to_en = pipeline("translation_ar_to_en", model="Helsinki-NLP/opus-mt-ar-en", device=0 if device == "cuda" else -1)
translator_en_to_ar = pipeline("translation_en_to_arabic", model="Helsinki-NLP/opus-mt-en-ar", device=0 if device == "cuda" else -1)
# Function to Convert the text to Speech in Ensglish
def text_to_speech_english(story_text):
audio_output = narrator(story_text)
# Extract audio and sampling rate from the output
audio = np.squeeze(audio_output['audio'])
sampling_rate = audio_output['sampling_rate']
# Save the output as a WAV file using soundfile
sf.write("story_english.wav", audio, sampling_rate)
return "story_english.wav"
# Function to Convert the text to Speech in Arabic using gTTS
def text_to_speech_arabic(story_text):
tts = gTTS(text=story_text, lang='ar')
tts.save("story_arabic.mp3")
return "story_arabic.mp3"
# Function to translate the full story in chunks
def translate_story_to_arabic(story_text):
sentences = re.split(r'(?<=[.!؟])\s+', story_text) # ٍSplit the story to list of sentences to translate
translated_sentences = []
for sentence in sentences: # For each sentence translate to arabic and append to the list
translation = translator_en_to_ar(sentence)[0]['translation_text']
translated_sentences.append(translation)
return ' '.join(translated_sentences) # Return the translated sentences list elements as one String
# Function to check if the image URL is valid and fetches the image
def fetch_image_from_url(url):
try:
response = requests.get(url, stream=True)
response.raise_for_status() # Check if the request was successful
return Image.open(BytesIO(response.content)) # Return the image if valid
except Exception as e:
print(f"Error fetching image from {url}: {str(e)}")
return None
# Process the result where result is shown base on selected language
def process_best_match(best_match, language):
best_image_url = best_match["image_url"]
best_story = best_match["Story"]
# Translate to Arabic if the language is Arabic
if language == "Arabic" or language == "ar":
best_story_translated = translate_story_to_arabic(best_story)
info_html = f"<div dir='rtl' style='font-size: 18px; color: white; font-family: Arial, sans-serif;'>{best_story_translated}</div>"
audio_file = text_to_speech_arabic(best_story_translated)
return best_image_url, info_html, audio_file
# Otherwise, use English
info_html = f"<div style='font-size: 18px; color: white;'>{best_story}</div>"
audio_file = text_to_speech_english(best_story)
return best_image_url, info_html, audio_file
# Function to match the uploaded image with the DataFrame to retrive the image of painting from the Datafram and it story in text and audio
def compare_images(image, language):
try:
inputs = processor(images=image, return_tensors="pt")
inputs = {k: v.to(device) for k, v in inputs.items()}
image_features = model.get_image_features(**inputs).to(device)
best_score = -2.0
best_match_idx = None
for idx, image_url in enumerate(df['image_url']):
db_image = fetch_image_from_url(image_url)
if db_image is None:
continue
db_inputs = processor(images=db_image, return_tensors="pt")
db_inputs = {k: v.to(device) for k, v in db_inputs.items()}
db_image_features = model.get_image_features(**db_inputs).to(device)
similarity = torch.nn.functional.cosine_similarity(image_features, db_image_features).item()
if similarity > best_score:
best_score = similarity
best_match_idx = idx
if best_match_idx is None:
return None, "Error: No valid image match found in the database.", None
best_match = df.iloc[best_match_idx]
return process_best_match(best_match, language)
except UnidentifiedImageError:
return None, "Error: The uploaded file is not a valid image.", None
except Exception as e:
return None, f"Error: {str(e)}", None
# Function to compare user input with descriptions in the DataFrame and return the best match Painting as image of painting with text and audio story of painting
def compare_description(input_text):
try:
language = detect(input_text) #detect the langauge of input
if language == 'ar':
input_text = translator_ar_to_en(input_text)[0]['translation_text']
input_embedding = semantic_model.encode(input_text, convert_to_tensor=True)
df_embeddings = semantic_model.encode(df["Description"].tolist(), convert_to_tensor=True)
similarities = util.pytorch_cos_sim(input_embedding, df_embeddings).squeeze()
best_match_idx = torch.argmax(similarities).item()
best_match = df.iloc[best_match_idx]
return process_best_match(best_match, language)
except Exception as e:
return None, f"Error: {str(e)}", None
# Custom CSS for Styling the Gradio
custom_css = """
.gradio-container {
background-image: url('https://images.squarespace-cdn.com/content/v1/587ee1eab3db2b428f68d221/1626734192415-LI75A3LVVFMJD5TVZ3HR/Gallery+2.jpg');
background-size: cover;
background-position: center;
background-repeat: no-repeat;
color: #333333;
font-family: 'Arial', sans-serif;
}
h1, #title, #description {
color: white !important;
}
#upload-text, #description-search-text {
color: white !important;
}
label, .gr-label {
color: #333333 !important;
}
button.primary {
background-color: #6A5ACD;
color: black;
border-radius: 10px;
padding: 10px;
margin: 5px;
font-size: 18px;
border: none;
transition: background-color 0.3s;
}
button.primary:hover {
background-color: #836FFF;
}
#image_output, #search_image_output {
border: 3px solid white;
border-radius: 10px;
}
/* Specifically targeting the example buttons */
.gr-examples button {
color: white !important;
background-color: transparent !important; /* Make the background blend in with the overall theme */
border: 1px solid white; /* Add a border if you want to highlight it */
}
"""
image_upload_examples = [
["https://pbs.twimg.com/media/DgAnD-FUcAAr3NT?format=jpg", "English"],
["https://pbs.twimg.com/media/DgAnD-FUcAAr3NT?format=jpg", "Arabic"]
]
# Sample Examples for the "Description Search" tab
description_search_examples = [
["Woman with a mysterious smile.", "English"],
["امرأة بابتسامة غامضة.", "Arabic"]
]
# Gradio interface with two tabs: "Image Upload" and "Description Search"
# Image Upload tab to get the Painting story by uploding an image
# Description Search tab is by getting Painting stroy by descriping the painting
with gr.Blocks(css=custom_css) as demo:
gr.Markdown("<h1 id='title'>Welcome to the Virtual Art Museum</h1>")
gr.Markdown("<p id='description'>Explore the most famous artworks. Upload an image or enter a description to learn about the story behind each piece.</p>")
with gr.Tab("Image Search"):
gr.Markdown("<h2 id='upload-text'>Upload Art to Recognize and Hear the Story Behind It</h2>")
image_input = gr.Image(type="pil", label="Upload an image of an art piece")
language_selector = gr.Radio(choices=["English", "Arabic"], label="Select Language for Story Narration", value="English")
recognize_button = gr.Button("Search")
image_output = gr.Image(label="Matched Art Piece", elem_id="image_output")
description_output = gr.HTML(label="Art Piece Information")
audio_output = gr.Audio(label="Narration of the Story")
recognize_button.click(compare_images, inputs=[image_input, language_selector], outputs=[image_output, description_output, audio_output])
gr.Examples(examples=image_upload_examples, inputs=[image_input, language_selector])
with gr.Tab("Description Search"):
gr.Markdown("<h2 id='description-search-text'>Description Search</h2>")
description_input = gr.Textbox(label="Enter a description (in English or Arabic)")
search_button = gr.Button("Search")
search_image_output = gr.Image(label="Matched Art Piece", elem_id="search_image_output")
search_description_output = gr.HTML(label="Art Piece Information")
search_audio_output = gr.Audio(label="Narration of the Story")
search_button.click(compare_description, inputs=description_input, outputs=[search_image_output, search_description_output, search_audio_output])
gr.Examples(examples=description_search_examples, inputs=description_input)
demo.launch() |