Spaces:
Running
Running
import torch | |
from transformers import ( | |
SegformerImageProcessor, | |
SegformerForSemanticSegmentation, | |
DPTImageProcessor, | |
DPTForDepthEstimation | |
) | |
from PIL import Image, ImageFilter | |
import numpy as np | |
import gradio as gr | |
import cv2 | |
# Suppress specific warnings | |
import warnings | |
warnings.filterwarnings("ignore", category=UserWarning, module="transformers") | |
# Load pre-trained models and processors | |
seg_processor = SegformerImageProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512") | |
seg_model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512") | |
depth_processor = DPTImageProcessor.from_pretrained("Intel/dpt-large") | |
depth_model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large") | |
def process_image(image): | |
# Ensure image is in RGB | |
if image.mode != "RGB": | |
image = image.convert("RGB") | |
# Resize the image to 512x512 | |
image = image.resize((512, 512)) | |
# ------------------ Semantic Segmentation ------------------ | |
seg_inputs = seg_processor(images=image, return_tensors="pt") | |
with torch.no_grad(): | |
seg_outputs = seg_model(**seg_inputs) | |
seg_logits = seg_outputs.logits | |
segmentation = torch.argmax(seg_logits, dim=1)[0].numpy() | |
# Create binary mask for 'person' class (class index 12) | |
person_class_index = 12 | |
binary_mask = (segmentation == person_class_index).astype(np.uint8) * 255 | |
binary_mask_image = Image.fromarray(binary_mask) | |
# ------------------ Depth Estimation ------------------ | |
depth_inputs = depth_processor(images=image, return_tensors="pt") | |
with torch.no_grad(): | |
depth_outputs = depth_model(**depth_inputs) | |
predicted_depth = depth_outputs.predicted_depth[0].cpu().numpy() | |
# Normalize the depth map for visualization | |
min_depth = predicted_depth.min() | |
max_depth = predicted_depth.max() | |
normalized_depth = (predicted_depth - min_depth) / (max_depth - min_depth) | |
depth_map_image = Image.fromarray((normalized_depth * 255).astype(np.uint8)) | |
# ------------------ Gaussian Blurred Background Effect ------------------ | |
# Invert the depth map | |
inverted_depth = 1 - normalized_depth | |
inverted_depth = (inverted_depth - inverted_depth.min()) / (inverted_depth.max() - inverted_depth.min()) | |
# Resize and expand dimensions to match image channels | |
depth_weight_resized = Image.fromarray((inverted_depth * 255).astype(np.uint8)).resize((512, 512)) | |
depth_weight_resized = np.array(depth_weight_resized) / 255.0 | |
depth_weight_resized = np.expand_dims(depth_weight_resized, axis=-1) | |
# Apply Gaussian blur to the entire image | |
gaussian_blurred_image = image.filter(ImageFilter.GaussianBlur(radius=15)) | |
gaussian_blurred_np = np.array(gaussian_blurred_image).astype(np.float32) | |
# Blend images based on the depth weight | |
original_np = np.array(image).astype(np.float32) | |
composite_gaussian_np = (1 - depth_weight_resized) * original_np + depth_weight_resized * gaussian_blurred_np | |
composite_gaussian_image = Image.fromarray(np.clip(composite_gaussian_np, 0, 255).astype(np.uint8)) | |
# ------------------ Lens Blurred Background Effect ------------------ | |
# Convert PIL image to OpenCV format | |
original_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR) | |
# Apply Lens Blur using OpenCV's blur with a larger kernel | |
# Note: OpenCV does not have a direct lens blur function, but we can approximate it | |
# by using a larger kernel size. For a more realistic lens blur, additional processing is required. | |
lens_blur_kernel_size = 21 # Adjust kernel size for stronger blur | |
lens_blurred_cv = cv2.GaussianBlur(original_cv, (lens_blur_kernel_size, lens_blur_kernel_size), 0) | |
# Convert back to PIL Image | |
lens_blurred_image = Image.fromarray(cv2.cvtColor(lens_blurred_cv, cv2.COLOR_BGR2RGB)) | |
lens_blurred_np = np.array(lens_blurred_image).astype(np.float32) | |
# Blend images based on the depth weight | |
composite_lens_np = (1 - depth_weight_resized) * original_np + depth_weight_resized * lens_blurred_np | |
composite_lens_image = Image.fromarray(np.clip(composite_lens_np, 0, 255).astype(np.uint8)) | |
# Return results | |
binary_mask_image = binary_mask_image.convert("L") # Ensure it's in grayscale | |
depth_map_image = depth_map_image.convert("L") # Ensure it's in grayscale | |
gaussian_blurred_image = composite_gaussian_image | |
lens_blurred_image = composite_lens_image | |
return image, binary_mask_image, depth_map_image, gaussian_blurred_image, lens_blurred_image | |
# Define Gradio interface using the updated API | |
interface = gr.Interface( | |
fn=process_image, | |
inputs=gr.Image(type="pil", label="Upload Image"), | |
outputs=[ | |
gr.Image(type="pil", label="Original Image"), | |
gr.Image(type="pil", label="Segmentation Mask (B/W)"), | |
gr.Image(type="pil", label="Depth Map"), | |
gr.Image(type="pil", label="Gaussian Blurred Background"), | |
gr.Image(type="pil", label="Lens Blurred Background"), | |
], | |
title="Semantic Segmentation and Dual Blur Effects", | |
description="Upload an image to generate a segmentation mask, depth map, Gaussian blurred background, and lens blurred background effect.", | |
examples=[ | |
["examples/Selfie_1.jpg"], | |
["examples/Selfie_2.jpg"] | |
] | |
) | |
# Launch the interface | |
if __name__ == "__main__": | |
interface.launch() | |