File size: 19,408 Bytes
fe4e4c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
#Imports
import gradio as gr
from PIL import Image, UnidentifiedImageError
from gtts import gTTS
import requests
import re
import torch
from transformers import CLIPProcessor, CLIPModel, pipeline
from sentence_transformers import SentenceTransformer, util
from langdetect import detect
from io import BytesIO
import pandas as pd
import numpy as np
import soundfile as sf
import os
import subprocess

# Run the setup script to install espeak-ng
subprocess.run(['bash', 'setup.sh'], check=True)


# DataFrame with information about the Paintings as image url, Title, description , stroy 

data = {
    "image_url": [
        "https://s.turbifycdn.com/aah/gallerydirectart/vincent-van-gogh-estate-signed-limited-edition-giclee-starry-night-47.png",  # Starry Night
        "https://cdn.mos.cms.futurecdn.net/xRqbwS4odpkSQscn3jHECh-1200-80.jpg",      # Mona Lisa
        "https://upload.wikimedia.org/wikipedia/en/d/dd/The_Persistence_of_Memory.jpg",  # The Persistence of Memory
        "https://static.wixstatic.com/media/1071a8_cf1930f883e043e28d03d5a26a5960ef~mv2.jpg/v1/fill/w_568,h_718,al_c,q_85,usm_0.66_1.00_0.01,enc_auto/1071a8_cf1930f883e043e28d03d5a26a5960ef~mv2.jpg",             # The Scream
        "https://images.artbrokerage.com/artthumb/magritte_158194_1/625x559/Rene_Magritte_Le_Fils_De_lhomme_the_Son_of_Man_1973.jpg",   # The Son of Man
        "https://www.artic.edu/iiif/2/25c31d8d-21a4-9ea1-1d73-6a2eca4dda7e/full/843,/0/default.jpg",                         # The Bedroom
        "https://images.desenio.com/zoom/17047_1.jpg",  # Girl with a Pearl Earring
        "https://www.hastingsindependentpress.co.uk/wp-content/uploads/2021/03/Whistlers-Mother.jpg",        # Whistler’s Mother
        "https://live.staticflickr.com/7173/6713746433_652c3d9d4e_c.jpg"            # The Basket of Apples
    ],
    "Title": [
        "Starry Night", "Mona Lisa", "The Persistence of Memory", "The Scream",
         "The Son of Man", "The Bedroom",
        "Girl with a Pearl Earring", "Whistler’s Mother", "The Basket of Apples"
    ],
    "Description": [
        # Starry Night
        ("Starry Night by Vincent van Gogh, painted in 1889, is one of the most famous works of art in the world. "
         "It depicts a swirling night sky filled with stars over a small town. The painting uses vibrant colors like blue and yellow, "
         "with exaggerated swirling patterns that create a dreamlike, almost chaotic feeling."),
        
        # Mona Lisa
        ("The Mona Lisa by Leonardo da Vinci, painted between 1503 and 1506, is a portrait of a woman with a subtle, enigmatic smile. "
         "The use of muted colors, including soft browns, greens, and black, emphasizes the serene and mysterious nature of the subject. "
         "It is one of the most studied and recognized works of art in history."),
        
        # The Persistence of Memory
        ("The Persistence of Memory, created by Salvador Dalí in 1931, features melting clocks draped over a surreal landscape. "
         "The painting, primarily in soft shades of brown, blue, and yellow, explores themes of time and memory. The abstract shapes "
         "and dreamlike atmosphere make it one of Dalí’s most famous surrealist works."),
        
        # The Scream
        ("The Scream by Edvard Munch, painted in 1893, is one of the most iconic images in modern art. "
         "It depicts a figure standing on a bridge, clutching their face in agony, as a blood-red sky swirls behind them. "
         "The painting uses bold reds, oranges, and blues to evoke a sense of horror and existential despair."),
        
        # The Son of Man
        ("The Son of Man by René Magritte, painted in 1964, is a surrealist self-portrait of the artist. "
         "It depicts a man in a bowler hat and suit, with his face obscured by a floating green apple. "
         "The background features a cloudy sky and a low wall, contributing to the dreamlike atmosphere. The painting is rich in symbolism, "
         "exploring themes of identity, concealment, and perception."),
        
        # The Bedroom
        ("The Bedroom by Vincent van Gogh, painted in 1888, depicts the artist’s simple bedroom in Arles, France. "
         "The painting uses bold, contrasting colors—yellow, red, and blue—to create a vibrant, almost childlike view of the space. "
         "Van Gogh painted this scene three times, each version representing his sense of comfort and sanctuary in his personal space."),
        
        # Girl with a Pearl Earring
        ("Girl with a Pearl Earring by Johannes Vermeer, painted in 1665, is often referred to as the 'Mona Lisa of the North.' "
         "The painting shows a young girl looking over her shoulder, wearing a large pearl earring. The use of light and shadow, "
         "combined with soft colors like blue and yellow, creates a lifelike, intimate portrait."),
        
        # Whistler’s Mother
        ("Whistler’s Mother by James McNeill Whistler, painted in 1871, is a portrait of the artist’s mother seated in profile. "
         "The painting uses muted tones of black, gray, and brown, reflecting the simplicity and dignity of the subject. "
         "It has become an icon of motherhood and restraint."),
        
        # The Basket of Apples
        ("The Basket of Apples by Paul Cézanne, painted around 1895, is a still life that challenges traditional perspectives. "
         "The painting shows a table with a basket of apples, a bottle, and bread. The use of soft colors, including browns, reds, and greens, "
         "along with the tilted angles, makes the objects seem to float, blurring the line between realism and abstraction.")
    ],
    "Story": [
        # Starry Night
        ("Vincent van Gogh painted 'Starry Night' while in a mental asylum in Saint-Rémy-de-Provence, France. "
         "It was created from memory and imagination, rather than a direct view from his window. The swirling patterns "
         "are thought to represent his emotional turbulence at the time. The painting is celebrated for its bold brushstrokes "
         "and imaginative use of color, representing the tension between beauty and chaos in the natural world."),
        
        # Mona Lisa
        ("'Mona Lisa' was painted by Leonardo da Vinci during the Renaissance period. The subject of the painting, "
         "believed to be Lisa Gherardini, is famed for her mysterious smile. The painting's sfumato technique, blending "
         "soft transitions between light and shadow, creates a lifelike, three-dimensional appearance. The Mona Lisa has inspired "
         "countless studies and interpretations over the centuries, and its theft in 1911 only increased its mystique."),
        
        # The Persistence of Memory
        ("Salvador Dalí's 'The Persistence of Memory' is a surrealist masterpiece that reflects the fluidity of time and memory. "
         "The melting clocks draped over the landscape suggest the passage of time becoming meaningless. The inspiration for the painting "
         "came from a melting camembert cheese. Dalí’s fascination with dream states and Freud's theories of the unconscious mind "
         "are evident in this strange, dreamlike scene."),
        
        # The Scream
        ("'The Scream' by Edvard Munch is a vivid expression of anxiety and existential dread. Munch was inspired to create the work "
         "after a walk during which he felt the 'great scream' of nature overwhelm him. The distorted figure and fiery red sky reflect "
         "Munch’s inner turmoil. The painting has become an iconic representation of human anxiety and has been widely referenced in pop culture."),
        
        # The Son of Man
        ("René Magritte’s 'The Son of Man' is a quintessential example of surrealism, blending reality and fantasy. "
         "The painting is a self-portrait with Magritte’s face hidden by a hovering green apple, symbolizing the tension between what is visible "
         "and what is hidden. The painting has been widely interpreted as a statement on identity and the nature of perception."),
        
        # The Bedroom
        ("'The Bedroom' by Vincent van Gogh is a reflection of the artist’s longing for stability and tranquility. "
         "The painting was created during one of the few peaceful periods in van Gogh’s turbulent life, and the vibrant colors convey "
         "his emotions at the time. The bold, contrasting colors and exaggerated perspective make the simple room appear almost alive."),
        
        # Girl with a Pearl Earring
        ("'Girl with a Pearl Earring' by Johannes Vermeer is one of the most enigmatic portraits in Western art. Known for its simplicity and elegance, "
         "the painting captures a fleeting moment of connection between the viewer and the subject. The girl’s mysterious gaze and the radiant light "
         "on her face have captivated audiences for centuries."),
        
        # Whistler’s Mother
        ("James McNeill Whistler’s 'Arrangement in Grey and Black No. 1,' more commonly known as 'Whistler’s Mother,' is a stark, dignified portrait "
         "of the artist’s mother. The painting is renowned for its minimalist composition and restrained use of color. Its iconic status grew after "
         "its display at the Musée d'Orsay in Paris, becoming a symbol of maternal devotion and calm."),
        
        # The Basket of Apples
        ("Paul Cézanne’s 'The Basket of Apples' is a revolutionary work that defies the traditional rules of perspective. By tilting objects at different angles, "
         "Cézanne challenges the viewer’s perception of space and reality. This still life is often cited as a precursor to Cubism, and its soft color palette "
         "creates a serene yet dynamic composition.")
    ]
}


df = pd.DataFrame(data)

# Load models

# Determine if a GPU (CUDA) is available
device = "cuda" if torch.cuda.is_available() else "cpu"

#  TTS model 
narrator = pipeline("text-to-speech", model="kakao-enterprise/vits-ljs", device=device)

# Load the CLIP model and processor
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)  
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# Load the semantic similarity model for description search
semantic_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2', device=device)  

# Load the translation models for Arabic to English and English to Arabic translations
translator_ar_to_en = pipeline("translation_ar_to_en", model="Helsinki-NLP/opus-mt-ar-en", device=0 if device == "cuda" else -1) 
translator_en_to_ar = pipeline("translation_en_to_arabic", model="Helsinki-NLP/opus-mt-en-ar", device=0 if device == "cuda" else -1)

# Function to Convert the text to Speech in Ensglish 
def text_to_speech_english(story_text):
    
    audio_output = narrator(story_text)

    # Extract audio and sampling rate from the output
    audio = np.squeeze(audio_output['audio']) 
    sampling_rate = audio_output['sampling_rate']

    # Save the output as a WAV file using soundfile
    sf.write("story_english.wav", audio, sampling_rate)

    return "story_english.wav"
    
# Function to Convert the text to Speech in Arabic using gTTS
def text_to_speech_arabic(story_text):
    tts = gTTS(text=story_text, lang='ar')
    tts.save("story_arabic.mp3")
    return "story_arabic.mp3"

# Function to translate the full story in chunks
def translate_story_to_arabic(story_text):
    sentences = re.split(r'(?<=[.!؟])\s+', story_text) # ٍSplit the story to list of sentences to translate
    translated_sentences = []
    
    for sentence in sentences: # For each sentence translate to arabic and append to the list 
      translation = translator_en_to_ar(sentence)[0]['translation_text']
      translated_sentences.append(translation)

    return ' '.join(translated_sentences) # Return the translated sentences list elements as one String

# Function to check if the image URL is valid and fetches the image
def fetch_image_from_url(url):
    try:
        response = requests.get(url, stream=True)
        response.raise_for_status()  # Check if the request was successful
        return Image.open(BytesIO(response.content))  # Return the image if valid
    except Exception as e:
        print(f"Error fetching image from {url}: {str(e)}")
        return None

# Process the result where result is shown base on selected language
def process_best_match(best_match, language):
    best_image_url = best_match["image_url"]
    best_story = best_match["Story"]

    # Translate to Arabic if the language is Arabic
    if language == "Arabic" or language == "ar":
        best_story_translated = translate_story_to_arabic(best_story)
        info_html = f"<div dir='rtl' style='font-size: 18px; color: white; font-family: Arial, sans-serif;'>{best_story_translated}</div>"
        audio_file = text_to_speech_arabic(best_story_translated)
        return best_image_url, info_html, audio_file

    # Otherwise, use English
    info_html = f"<div style='font-size: 18px; color: white;'>{best_story}</div>"
    audio_file = text_to_speech_english(best_story)  
    return best_image_url, info_html, audio_file

# Function to match the uploaded image with the DataFrame to retrive the image of painting from the Datafram and it story in text and audio

def compare_images(image, language):
    try:

        inputs = processor(images=image, return_tensors="pt")
        inputs = {k: v.to(device) for k, v in inputs.items()}
        image_features = model.get_image_features(**inputs).to(device)

        best_score = -2.0 
        best_match_idx = None  

        
        for idx, image_url in enumerate(df['image_url']):
            db_image = fetch_image_from_url(image_url)
            if db_image is None:
                continue

            
            db_inputs = processor(images=db_image, return_tensors="pt")
            db_inputs = {k: v.to(device) for k, v in db_inputs.items()}  
            db_image_features = model.get_image_features(**db_inputs).to(device)  

            
            similarity = torch.nn.functional.cosine_similarity(image_features, db_image_features).item()

            
            if similarity > best_score:
                best_score = similarity
                best_match_idx = idx

        
        if best_match_idx is None:
            return None, "Error: No valid image match found in the database.", None

       
        best_match = df.iloc[best_match_idx]
        return process_best_match(best_match, language)

    except UnidentifiedImageError:
        return None, "Error: The uploaded file is not a valid image.", None
    except Exception as e:
        return None, f"Error: {str(e)}", None

# Function to compare user input with descriptions in the DataFrame and return the best match Painting as image of painting with text and audio story of painting
def compare_description(input_text):
    try:
        language = detect(input_text) #detect the langauge of input 
        if language == 'ar':
            input_text = translator_ar_to_en(input_text)[0]['translation_text']

        input_embedding = semantic_model.encode(input_text, convert_to_tensor=True)
        df_embeddings = semantic_model.encode(df["Description"].tolist(), convert_to_tensor=True)

        similarities = util.pytorch_cos_sim(input_embedding, df_embeddings).squeeze()
        best_match_idx = torch.argmax(similarities).item()
        best_match = df.iloc[best_match_idx]

        return process_best_match(best_match, language)

    except Exception as e:
        return None, f"Error: {str(e)}", None

# Custom CSS for Styling the Gradio

custom_css = """
.gradio-container {
    background-image: url('https://images.squarespace-cdn.com/content/v1/587ee1eab3db2b428f68d221/1626734192415-LI75A3LVVFMJD5TVZ3HR/Gallery+2.jpg');
    background-size: cover;
    background-position: center;
    background-repeat: no-repeat;
    color: #333333;
    font-family: 'Arial', sans-serif;
}

h1, #title, #description {
    color: white !important;
}

#upload-text, #description-search-text {
    color: white !important;
}

label, .gr-label {
    color: #333333 !important;
}

button.primary {
    background-color: #6A5ACD;
    color: black;
    border-radius: 10px;
    padding: 10px;
    margin: 5px;
    font-size: 18px;
    border: none;
    transition: background-color 0.3s;
}

button.primary:hover {
    background-color: #836FFF;
}

#image_output, #search_image_output {
    border: 3px solid white;
    border-radius: 10px;
}

/* Specifically targeting the example buttons */
.gr-examples button {
    color: white !important;
    background-color: transparent !important; /* Make the background blend in with the overall theme */
    border: 1px solid white; /* Add a border if you want to highlight it */
}
"""

image_upload_examples = [
    ["https://pbs.twimg.com/media/DgAnD-FUcAAr3NT?format=jpg", "English"],
    ["https://pbs.twimg.com/media/DgAnD-FUcAAr3NT?format=jpg", "Arabic"]
]

# Sample Examples for the "Description Search" tab
description_search_examples = [
    ["Woman with a mysterious smile.", "English"],
    ["امرأة بابتسامة غامضة.", "Arabic"]
]

# Gradio interface with two tabs: "Image Upload" and "Description Search"
# Image Upload  tab to get the Painting story by uploding an image 
# Description Search tab is by getting Painting stroy by descriping the painting

with gr.Blocks(css=custom_css) as demo:
    gr.Markdown("<h1 id='title'>Welcome to the Virtual Art Museum</h1>")
    gr.Markdown("<p id='description'>Explore the most famous artworks. Upload an image or enter a description to learn about the story behind each piece.</p>")

    with gr.Tab("Image Search"):
        gr.Markdown("<h2 id='upload-text'>Upload Art to Recognize and Hear the Story Behind It</h2>")

        image_input = gr.Image(type="pil", label="Upload an image of an art piece")
        language_selector = gr.Radio(choices=["English", "Arabic"], label="Select Language for Story Narration", value="English")
        recognize_button = gr.Button("Search")  

        image_output = gr.Image(label="Matched Art Piece", elem_id="image_output")
        description_output = gr.HTML(label="Art Piece Information")
        audio_output = gr.Audio(label="Narration of the Story")

        recognize_button.click(compare_images, inputs=[image_input, language_selector], outputs=[image_output, description_output, audio_output])

        gr.Examples(examples=image_upload_examples, inputs=[image_input, language_selector])
    with gr.Tab("Description Search"):
        gr.Markdown("<h2 id='description-search-text'>Description Search</h2>")

        description_input = gr.Textbox(label="Enter a description (in English or Arabic)")
        search_button = gr.Button("Search")   

        search_image_output = gr.Image(label="Matched Art Piece", elem_id="search_image_output")
        search_description_output = gr.HTML(label="Art Piece Information")
        search_audio_output = gr.Audio(label="Narration of the Story")

        search_button.click(compare_description, inputs=description_input, outputs=[search_image_output, search_description_output, search_audio_output])

        gr.Examples(examples=description_search_examples, inputs=description_input)

demo.launch()