#Imports import gradio as gr from PIL import Image, UnidentifiedImageError from gtts import gTTS import requests import re import torch from transformers import CLIPProcessor, CLIPModel, pipeline from sentence_transformers import SentenceTransformer, util from langdetect import detect from io import BytesIO import pandas as pd import numpy as np import soundfile as sf import os import subprocess # Run the setup script to install espeak-ng subprocess.run(['bash', 'setup.sh'], check=True) # DataFrame with information about the Paintings as image url, Title, description , stroy data = { "image_url": [ "https://s.turbifycdn.com/aah/gallerydirectart/vincent-van-gogh-estate-signed-limited-edition-giclee-starry-night-47.png", # Starry Night "https://cdn.mos.cms.futurecdn.net/xRqbwS4odpkSQscn3jHECh-1200-80.jpg", # Mona Lisa "https://upload.wikimedia.org/wikipedia/en/d/dd/The_Persistence_of_Memory.jpg", # The Persistence of Memory "https://static.wixstatic.com/media/1071a8_cf1930f883e043e28d03d5a26a5960ef~mv2.jpg/v1/fill/w_568,h_718,al_c,q_85,usm_0.66_1.00_0.01,enc_auto/1071a8_cf1930f883e043e28d03d5a26a5960ef~mv2.jpg", # The Scream "https://images.artbrokerage.com/artthumb/magritte_158194_1/625x559/Rene_Magritte_Le_Fils_De_lhomme_the_Son_of_Man_1973.jpg", # The Son of Man "https://www.artic.edu/iiif/2/25c31d8d-21a4-9ea1-1d73-6a2eca4dda7e/full/843,/0/default.jpg", # The Bedroom "https://images.desenio.com/zoom/17047_1.jpg", # Girl with a Pearl Earring "https://www.hastingsindependentpress.co.uk/wp-content/uploads/2021/03/Whistlers-Mother.jpg", # Whistler’s Mother "https://live.staticflickr.com/7173/6713746433_652c3d9d4e_c.jpg" # The Basket of Apples ], "Title": [ "Starry Night", "Mona Lisa", "The Persistence of Memory", "The Scream", "The Son of Man", "The Bedroom", "Girl with a Pearl Earring", "Whistler’s Mother", "The Basket of Apples" ], "Description": [ # Starry Night ("Starry Night by Vincent van Gogh, painted in 1889, is one of the most famous works of art in the world. " "It depicts a swirling night sky filled with stars over a small town. The painting uses vibrant colors like blue and yellow, " "with exaggerated swirling patterns that create a dreamlike, almost chaotic feeling."), # Mona Lisa ("The Mona Lisa by Leonardo da Vinci, painted between 1503 and 1506, is a portrait of a woman with a subtle, enigmatic smile. " "The use of muted colors, including soft browns, greens, and black, emphasizes the serene and mysterious nature of the subject. " "It is one of the most studied and recognized works of art in history."), # The Persistence of Memory ("The Persistence of Memory, created by Salvador Dalí in 1931, features melting clocks draped over a surreal landscape. " "The painting, primarily in soft shades of brown, blue, and yellow, explores themes of time and memory. The abstract shapes " "and dreamlike atmosphere make it one of Dalí’s most famous surrealist works."), # The Scream ("The Scream by Edvard Munch, painted in 1893, is one of the most iconic images in modern art. " "It depicts a figure standing on a bridge, clutching their face in agony, as a blood-red sky swirls behind them. " "The painting uses bold reds, oranges, and blues to evoke a sense of horror and existential despair."), # The Son of Man ("The Son of Man by René Magritte, painted in 1964, is a surrealist self-portrait of the artist. " "It depicts a man in a bowler hat and suit, with his face obscured by a floating green apple. " "The background features a cloudy sky and a low wall, contributing to the dreamlike atmosphere. The painting is rich in symbolism, " "exploring themes of identity, concealment, and perception."), # The Bedroom ("The Bedroom by Vincent van Gogh, painted in 1888, depicts the artist’s simple bedroom in Arles, France. " "The painting uses bold, contrasting colors—yellow, red, and blue—to create a vibrant, almost childlike view of the space. " "Van Gogh painted this scene three times, each version representing his sense of comfort and sanctuary in his personal space."), # Girl with a Pearl Earring ("Girl with a Pearl Earring by Johannes Vermeer, painted in 1665, is often referred to as the 'Mona Lisa of the North.' " "The painting shows a young girl looking over her shoulder, wearing a large pearl earring. The use of light and shadow, " "combined with soft colors like blue and yellow, creates a lifelike, intimate portrait."), # Whistler’s Mother ("Whistler’s Mother by James McNeill Whistler, painted in 1871, is a portrait of the artist’s mother seated in profile. " "The painting uses muted tones of black, gray, and brown, reflecting the simplicity and dignity of the subject. " "It has become an icon of motherhood and restraint."), # The Basket of Apples ("The Basket of Apples by Paul Cézanne, painted around 1895, is a still life that challenges traditional perspectives. " "The painting shows a table with a basket of apples, a bottle, and bread. The use of soft colors, including browns, reds, and greens, " "along with the tilted angles, makes the objects seem to float, blurring the line between realism and abstraction.") ], "Story": [ # Starry Night ("Vincent van Gogh painted 'Starry Night' while in a mental asylum in Saint-Rémy-de-Provence, France. " "It was created from memory and imagination, rather than a direct view from his window. The swirling patterns " "are thought to represent his emotional turbulence at the time. The painting is celebrated for its bold brushstrokes " "and imaginative use of color, representing the tension between beauty and chaos in the natural world."), # Mona Lisa ("'Mona Lisa' was painted by Leonardo da Vinci during the Renaissance period. The subject of the painting, " "believed to be Lisa Gherardini, is famed for her mysterious smile. The painting's sfumato technique, blending " "soft transitions between light and shadow, creates a lifelike, three-dimensional appearance. The Mona Lisa has inspired " "countless studies and interpretations over the centuries, and its theft in 1911 only increased its mystique."), # The Persistence of Memory ("Salvador Dalí's 'The Persistence of Memory' is a surrealist masterpiece that reflects the fluidity of time and memory. " "The melting clocks draped over the landscape suggest the passage of time becoming meaningless. The inspiration for the painting " "came from a melting camembert cheese. Dalí’s fascination with dream states and Freud's theories of the unconscious mind " "are evident in this strange, dreamlike scene."), # The Scream ("'The Scream' by Edvard Munch is a vivid expression of anxiety and existential dread. Munch was inspired to create the work " "after a walk during which he felt the 'great scream' of nature overwhelm him. The distorted figure and fiery red sky reflect " "Munch’s inner turmoil. The painting has become an iconic representation of human anxiety and has been widely referenced in pop culture."), # The Son of Man ("René Magritte’s 'The Son of Man' is a quintessential example of surrealism, blending reality and fantasy. " "The painting is a self-portrait with Magritte’s face hidden by a hovering green apple, symbolizing the tension between what is visible " "and what is hidden. The painting has been widely interpreted as a statement on identity and the nature of perception."), # The Bedroom ("'The Bedroom' by Vincent van Gogh is a reflection of the artist’s longing for stability and tranquility. " "The painting was created during one of the few peaceful periods in van Gogh’s turbulent life, and the vibrant colors convey " "his emotions at the time. The bold, contrasting colors and exaggerated perspective make the simple room appear almost alive."), # Girl with a Pearl Earring ("'Girl with a Pearl Earring' by Johannes Vermeer is one of the most enigmatic portraits in Western art. Known for its simplicity and elegance, " "the painting captures a fleeting moment of connection between the viewer and the subject. The girl’s mysterious gaze and the radiant light " "on her face have captivated audiences for centuries."), # Whistler’s Mother ("James McNeill Whistler’s 'Arrangement in Grey and Black No. 1,' more commonly known as 'Whistler’s Mother,' is a stark, dignified portrait " "of the artist’s mother. The painting is renowned for its minimalist composition and restrained use of color. Its iconic status grew after " "its display at the Musée d'Orsay in Paris, becoming a symbol of maternal devotion and calm."), # The Basket of Apples ("Paul Cézanne’s 'The Basket of Apples' is a revolutionary work that defies the traditional rules of perspective. By tilting objects at different angles, " "Cézanne challenges the viewer’s perception of space and reality. This still life is often cited as a precursor to Cubism, and its soft color palette " "creates a serene yet dynamic composition.") ] } df = pd.DataFrame(data) # Load models # Determine if a GPU (CUDA) is available device = "cuda" if torch.cuda.is_available() else "cpu" # TTS model narrator = pipeline("text-to-speech", model="kakao-enterprise/vits-ljs", device=device) # Load the CLIP model and processor model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device) processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32") # Load the semantic similarity model for description search semantic_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2', device=device) # Load the translation models for Arabic to English and English to Arabic translations translator_ar_to_en = pipeline("translation_ar_to_en", model="Helsinki-NLP/opus-mt-ar-en", device=0 if device == "cuda" else -1) translator_en_to_ar = pipeline("translation_en_to_arabic", model="Helsinki-NLP/opus-mt-en-ar", device=0 if device == "cuda" else -1) # Function to Convert the text to Speech in Ensglish def text_to_speech_english(story_text): audio_output = narrator(story_text) # Extract audio and sampling rate from the output audio = np.squeeze(audio_output['audio']) sampling_rate = audio_output['sampling_rate'] # Save the output as a WAV file using soundfile sf.write("story_english.wav", audio, sampling_rate) return "story_english.wav" # Function to Convert the text to Speech in Arabic using gTTS def text_to_speech_arabic(story_text): tts = gTTS(text=story_text, lang='ar') tts.save("story_arabic.mp3") return "story_arabic.mp3" # Function to translate the full story in chunks def translate_story_to_arabic(story_text): sentences = re.split(r'(?<=[.!؟])\s+', story_text) # ٍSplit the story to list of sentences to translate translated_sentences = [] for sentence in sentences: # For each sentence translate to arabic and append to the list translation = translator_en_to_ar(sentence)[0]['translation_text'] translated_sentences.append(translation) return ' '.join(translated_sentences) # Return the translated sentences list elements as one String # Function to check if the image URL is valid and fetches the image def fetch_image_from_url(url): try: response = requests.get(url, stream=True) response.raise_for_status() # Check if the request was successful return Image.open(BytesIO(response.content)) # Return the image if valid except Exception as e: print(f"Error fetching image from {url}: {str(e)}") return None # Process the result where result is shown base on selected language def process_best_match(best_match, language): best_image_url = best_match["image_url"] best_story = best_match["Story"] # Translate to Arabic if the language is Arabic if language == "Arabic" or language == "ar": best_story_translated = translate_story_to_arabic(best_story) info_html = f"
{best_story_translated}
" audio_file = text_to_speech_arabic(best_story_translated) return best_image_url, info_html, audio_file # Otherwise, use English info_html = f"
{best_story}
" audio_file = text_to_speech_english(best_story) return best_image_url, info_html, audio_file # Function to match the uploaded image with the DataFrame to retrive the image of painting from the Datafram and it story in text and audio def compare_images(image, language): try: inputs = processor(images=image, return_tensors="pt") inputs = {k: v.to(device) for k, v in inputs.items()} image_features = model.get_image_features(**inputs).to(device) best_score = -2.0 best_match_idx = None for idx, image_url in enumerate(df['image_url']): db_image = fetch_image_from_url(image_url) if db_image is None: continue db_inputs = processor(images=db_image, return_tensors="pt") db_inputs = {k: v.to(device) for k, v in db_inputs.items()} db_image_features = model.get_image_features(**db_inputs).to(device) similarity = torch.nn.functional.cosine_similarity(image_features, db_image_features).item() if similarity > best_score: best_score = similarity best_match_idx = idx if best_match_idx is None: return None, "Error: No valid image match found in the database.", None best_match = df.iloc[best_match_idx] return process_best_match(best_match, language) except UnidentifiedImageError: return None, "Error: The uploaded file is not a valid image.", None except Exception as e: return None, f"Error: {str(e)}", None # Function to compare user input with descriptions in the DataFrame and return the best match Painting as image of painting with text and audio story of painting def compare_description(input_text): try: language = detect(input_text) #detect the langauge of input if language == 'ar': input_text = translator_ar_to_en(input_text)[0]['translation_text'] input_embedding = semantic_model.encode(input_text, convert_to_tensor=True) df_embeddings = semantic_model.encode(df["Description"].tolist(), convert_to_tensor=True) similarities = util.pytorch_cos_sim(input_embedding, df_embeddings).squeeze() best_match_idx = torch.argmax(similarities).item() best_match = df.iloc[best_match_idx] return process_best_match(best_match, language) except Exception as e: return None, f"Error: {str(e)}", None # Custom CSS for Styling the Gradio custom_css = """ .gradio-container { background-image: url('https://images.squarespace-cdn.com/content/v1/587ee1eab3db2b428f68d221/1626734192415-LI75A3LVVFMJD5TVZ3HR/Gallery+2.jpg'); background-size: cover; background-position: center; background-repeat: no-repeat; color: #333333; font-family: 'Arial', sans-serif; } h1, #title, #description { color: white !important; } #upload-text, #description-search-text { color: white !important; } label, .gr-label { color: #333333 !important; } button.primary { background-color: #6A5ACD; color: black; border-radius: 10px; padding: 10px; margin: 5px; font-size: 18px; border: none; transition: background-color 0.3s; } button.primary:hover { background-color: #836FFF; } #image_output, #search_image_output { border: 3px solid white; border-radius: 10px; } /* Specifically targeting the example buttons */ .gr-examples button { color: white !important; background-color: transparent !important; /* Make the background blend in with the overall theme */ border: 1px solid white; /* Add a border if you want to highlight it */ } """ image_upload_examples = [ ["https://pbs.twimg.com/media/DgAnD-FUcAAr3NT?format=jpg", "English"], ["https://pbs.twimg.com/media/DgAnD-FUcAAr3NT?format=jpg", "Arabic"] ] # Sample Examples for the "Description Search" tab description_search_examples = [ ["Woman with a mysterious smile.", "English"], ["امرأة بابتسامة غامضة.", "Arabic"] ] # Gradio interface with two tabs: "Image Upload" and "Description Search" # Image Upload tab to get the Painting story by uploding an image # Description Search tab is by getting Painting stroy by descriping the painting with gr.Blocks(css=custom_css) as demo: gr.Markdown("

Welcome to the Virtual Art Museum

") gr.Markdown("

Explore the most famous artworks. Upload an image or enter a description to learn about the story behind each piece.

") with gr.Tab("Image Search"): gr.Markdown("

Upload Art to Recognize and Hear the Story Behind It

") image_input = gr.Image(type="pil", label="Upload an image of an art piece") language_selector = gr.Radio(choices=["English", "Arabic"], label="Select Language for Story Narration", value="English") recognize_button = gr.Button("Search") image_output = gr.Image(label="Matched Art Piece", elem_id="image_output") description_output = gr.HTML(label="Art Piece Information") audio_output = gr.Audio(label="Narration of the Story") recognize_button.click(compare_images, inputs=[image_input, language_selector], outputs=[image_output, description_output, audio_output]) gr.Examples(examples=image_upload_examples, inputs=[image_input, language_selector]) with gr.Tab("Description Search"): gr.Markdown("

Description Search

") description_input = gr.Textbox(label="Enter a description (in English or Arabic)") search_button = gr.Button("Search") search_image_output = gr.Image(label="Matched Art Piece", elem_id="search_image_output") search_description_output = gr.HTML(label="Art Piece Information") search_audio_output = gr.Audio(label="Narration of the Story") search_button.click(compare_description, inputs=description_input, outputs=[search_image_output, search_description_output, search_audio_output]) gr.Examples(examples=description_search_examples, inputs=description_input) demo.launch()