glass_try_on1 / app.py
Siyun He
make the face detection 3D and add comments
58a86d0
import cv2
import cvzone
import numpy as np
import os
import gradio as gr
import mediapipe as mp
from datetime import datetime
# Load the YuNet model
model_path = 'face_detection_yunet_2023mar.onnx'
face_detector = cv2.FaceDetectorYN.create(model_path, "", (320, 320))
# Initialize MediaPipe Face Mesh
mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(static_image_mode=False, max_num_faces=1, min_detection_confidence=0.5)
# Initialize the glass number
num = 1
overlay_bgr = cv2.imread(f'glasses/glass{num}.png', cv2.IMREAD_UNCHANGED)
# Split the channels
b, g, r, a = cv2.split(overlay_bgr)
# Merge back in RGB order
overlay_rgb = cv2.merge((r, g, b, a))
# Use overlay_rgb in your process_frame function
overlay = overlay_rgb
# Count glasses files
def count_files_in_directory(directory):
file_count = 0
for root, dirs, files in os.walk(directory):
file_count += len(files)
return file_count
# Determine face shape
def determine_face_shape_3d(landmarks):
# Calculate 3D distances
jaw_width = np.linalg.norm(landmarks[0] - landmarks[16])
face_height = np.linalg.norm(landmarks[8] - landmarks[27])
# Determine face shape based on 3D proportions
if jaw_width / face_height > 1.5:
return "Round"
elif jaw_width / face_height < 1.2:
return "Oval"
else:
return "Square"
# Recommend glass shape based on face shape
def recommend_glass_shape(face_shape):
if face_shape == "Round":
return "Square"
elif face_shape == "Oval":
return "Round"
else:
return "Square"
directory_path = 'glasses'
total_glass_num = count_files_in_directory(directory_path)
# Change glasses
def change_glasses():
global num, overlay
num += 1
if num > total_glass_num:
num = 1
overlay_bgr = cv2.imread(f'glasses/glass{num}.png', cv2.IMREAD_UNCHANGED)
b, g, r, a = cv2.split(overlay_bgr)
overlay_rgb = cv2.merge((r, g, b, a))
overlay = overlay_rgb
return overlay
def change_lip_color(frame, color_name='none'):
# Define a mapping from color names to BGR values
color_map = {
'classic_red': (255, 0, 0), # Classic red
'deep_red': (139, 0, 0), # Deep red
'cherry_red': (205, 0, 0), # Cherry red
'rose_red': (204, 102, 0), # Rose red
'wine_red': (128, 0, 0), # Wine red
'brick_red': (128, 64, 0), # Brick red
'coral_red': (255, 128, 0), # Coral red
'berry_red': (153, 0, 0), # Berry red
'ruby_red': (255, 17, 0), # Ruby red
'crimson_red': (220, 20, 60), # Crimson red
}
# Get the BGR color from the color name
color = color_map.get(color_name, None)
# If 'none' is selected, return the original frame
if color is None:
return frame
# Convert to RGB for processing
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
results = face_mesh.process(frame_rgb)
if results.multi_face_landmarks:
for face_landmarks in results.multi_face_landmarks:
# Define the region for the upper lip using landmark indices
upper_lip_region = np.array([
(face_landmarks.landmark[61].x * frame.shape[1], face_landmarks.landmark[61].y * frame.shape[0]),
(face_landmarks.landmark[185].x * frame.shape[1], face_landmarks.landmark[185].y * frame.shape[0]),
(face_landmarks.landmark[40].x * frame.shape[1], face_landmarks.landmark[40].y * frame.shape[0]),
(face_landmarks.landmark[39].x * frame.shape[1], face_landmarks.landmark[39].y * frame.shape[0]),
(face_landmarks.landmark[37].x * frame.shape[1], face_landmarks.landmark[37].y * frame.shape[0]),
(face_landmarks.landmark[0].x * frame.shape[1], face_landmarks.landmark[0].y * frame.shape[0]),
(face_landmarks.landmark[267].x * frame.shape[1], face_landmarks.landmark[267].y * frame.shape[0]),
(face_landmarks.landmark[269].x * frame.shape[1], face_landmarks.landmark[269].y * frame.shape[0]),
(face_landmarks.landmark[270].x * frame.shape[1], face_landmarks.landmark[270].y * frame.shape[0]),
(face_landmarks.landmark[409].x * frame.shape[1], face_landmarks.landmark[409].y * frame.shape[0]),
(face_landmarks.landmark[291].x * frame.shape[1], face_landmarks.landmark[291].y * frame.shape[0]),
(face_landmarks.landmark[61].x * frame.shape[1], face_landmarks.landmark[61].y * frame.shape[0])
], np.int32)
# Define the region for the lower lip using landmark indices
lower_lip_region = np.array([
(face_landmarks.landmark[61].x * frame.shape[1], face_landmarks.landmark[61].y * frame.shape[0]),
(face_landmarks.landmark[146].x * frame.shape[1], face_landmarks.landmark[146].y * frame.shape[0]),
(face_landmarks.landmark[91].x * frame.shape[1], face_landmarks.landmark[91].y * frame.shape[0]),
(face_landmarks.landmark[181].x * frame.shape[1], face_landmarks.landmark[181].y * frame.shape[0]),
(face_landmarks.landmark[84].x * frame.shape[1], face_landmarks.landmark[84].y * frame.shape[0]),
(face_landmarks.landmark[17].x * frame.shape[1], face_landmarks.landmark[17].y * frame.shape[0]),
(face_landmarks.landmark[314].x * frame.shape[1], face_landmarks.landmark[314].y * frame.shape[0]),
(face_landmarks.landmark[405].x * frame.shape[1], face_landmarks.landmark[405].y * frame.shape[0]),
(face_landmarks.landmark[321].x * frame.shape[1], face_landmarks.landmark[321].y * frame.shape[0]),
(face_landmarks.landmark[375].x * frame.shape[1], face_landmarks.landmark[375].y * frame.shape[0]),
(face_landmarks.landmark[291].x * frame.shape[1], face_landmarks.landmark[291].y * frame.shape[0]),
(face_landmarks.landmark[61].x * frame.shape[1], face_landmarks.landmark[61].y * frame.shape[0])
], np.int32)
lip_region = np.concatenate((upper_lip_region, lower_lip_region), axis=0)
# Define the region for the teeth using landmark indices
teeth_region = np.array([
(face_landmarks.landmark[78].x * frame.shape[1], face_landmarks.landmark[78].y * frame.shape[0]),
(face_landmarks.landmark[95].x * frame.shape[1], face_landmarks.landmark[95].y * frame.shape[0]),
(face_landmarks.landmark[88].x * frame.shape[1], face_landmarks.landmark[88].y * frame.shape[0]),
(face_landmarks.landmark[178].x * frame.shape[1], face_landmarks.landmark[178].y * frame.shape[0]),
(face_landmarks.landmark[87].x * frame.shape[1], face_landmarks.landmark[87].y * frame.shape[0]),
(face_landmarks.landmark[14].x * frame.shape[1], face_landmarks.landmark[14].y * frame.shape[0]),
(face_landmarks.landmark[317].x * frame.shape[1], face_landmarks.landmark[317].y * frame.shape[0]),
(face_landmarks.landmark[402].x * frame.shape[1], face_landmarks.landmark[402].y * frame.shape[0]),
(face_landmarks.landmark[318].x * frame.shape[1], face_landmarks.landmark[318].y * frame.shape[0]),
(face_landmarks.landmark[324].x * frame.shape[1], face_landmarks.landmark[324].y * frame.shape[0]),
(face_landmarks.landmark[308].x * frame.shape[1], face_landmarks.landmark[308].y * frame.shape[0]),
(face_landmarks.landmark[78].x * frame.shape[1], face_landmarks.landmark[78].y * frame.shape[0])
], np.int32)
# Create a mask for the lip region
lip_mask = np.zeros(frame.shape[:2], dtype=np.uint8)
cv2.fillPoly(lip_mask, [lip_region], 255)
# Create a mask for the teeth region
teeth_mask = np.zeros(frame.shape[:2], dtype=np.uint8)
cv2.fillPoly(teeth_mask, [teeth_region], 255)
# Subtract the teeth mask from the lip mask
final_mask = cv2.subtract(lip_mask, teeth_mask)
# Create a colored lip image
colored_lips = np.zeros_like(frame)
colored_lips[:] = color
# Apply the colored lips only to the lip region
lips_colored = cv2.bitwise_and(colored_lips, colored_lips, mask=final_mask)
# Combine the original frame with the colored lips
frame = cv2.bitwise_and(frame, frame, mask=cv2.bitwise_not(final_mask))
frame = cv2.add(frame, lips_colored)
return frame
# Process frame for overlay and face shape detection
def process_frame_3d(frame):
global overlay
frame = np.array(frame, copy=True)
height, width = frame.shape[:2]
face_detector.setInputSize((width, height))
_, faces = face_detector.detect(frame)
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
results = face_mesh.process(frame_rgb)
face_shape = "Unknown"
glass_shape = "Unknown"
if faces is not None and results.multi_face_landmarks:
for face in faces:
x, y, w, h = face[:4].astype(int)
face_landmarks = face[4:14].reshape(5, 2).astype(int)
left_eye_x, left_eye_y = face_landmarks[0].astype(int)
right_eye_x, right_eye_y = face_landmarks[1].astype(int)
eye_center_x = (left_eye_x + right_eye_x) // 2
eye_center_y = (left_eye_y + right_eye_y) // 2
delta_x = right_eye_x - left_eye_x
delta_y = right_eye_y - left_eye_y
angle = np.degrees(np.arctan2(delta_y, delta_x))
angle = -angle
overlay_resize = cv2.resize(overlay, (int(w * 1.15), int(h * 0.8)))
overlay_center = (overlay_resize.shape[1] // 2, overlay_resize.shape[0] // 2)
rotation_matrix = cv2.getRotationMatrix2D(overlay_center, angle, 1.0)
overlay_rotated = cv2.warpAffine(
overlay_resize, rotation_matrix,
(overlay_resize.shape[1], overlay_resize.shape[0]),
flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=(0, 0, 0, 0)
)
overlay_x = eye_center_x - overlay_rotated.shape[1] // 2
overlay_y = eye_center_y - overlay_rotated.shape[0] // 2
try:
frame = cvzone.overlayPNG(frame, overlay_rotated, [overlay_x, overlay_y])
except Exception as e:
print(f"Error overlaying glasses: {e}")
for face_landmarks_mp in results.multi_face_landmarks:
# Convert landmarks to 3D coordinates
landmarks = np.array([(lm.x * frame.shape[1], lm.y * frame.shape[0], lm.z * frame.shape[1]) for lm in face_landmarks_mp.landmark])
face_shape = determine_face_shape_3d(landmarks)
glass_shape = recommend_glass_shape(face_shape)
return frame, face_shape, glass_shape
# Transform function
def transform_cv2(frame, transform):
if transform == "cartoon":
# prepare color
img_color = cv2.pyrDown(cv2.pyrDown(frame)) # Reduce the resolution
for _ in range(6):
img_color = cv2.bilateralFilter(img_color, 9, 9, 7) # Smoothen the image while preserving the edges
img_color = cv2.pyrUp(cv2.pyrUp(img_color)) # Scale back to the original resolution
# prepare edges
img_edges = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY) # Convert to grayscale
img_edges = cv2.adaptiveThreshold(
cv2.medianBlur(img_edges, 7),
255,
cv2.ADAPTIVE_THRESH_MEAN_C,
cv2.THRESH_BINARY,
9,
2,
) # Apply adaptive thresholding to get the edges
img_edges = cv2.cvtColor(img_edges, cv2.COLOR_GRAY2RGB) # Convert back to color
# combine color and edges
img = cv2.bitwise_and(img_color, img_edges)
return img # Combine the color and edges
elif transform == "edges":
# perform edge detection
img = cv2.cvtColor(cv2.Canny(frame, 100, 200), cv2.COLOR_GRAY2BGR)
return img
elif transform == "sepia":
# apply sepia effect
kernel = np.array([[0.272, 0.534, 0.131],
[0.349, 0.686, 0.168],
[0.393, 0.769, 0.189]])
img = cv2.transform(frame, kernel)
img = np.clip(img, 0, 255) # ensure values are within byte range
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
return img_rgb
elif transform == "negative":
# apply negative effect
img = cv2.bitwise_not(frame)
return img
elif transform == "sketch":
# apply sketch effect
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
inv_gray = cv2.bitwise_not(gray)
blur = cv2.GaussianBlur(inv_gray, (21, 21), 0)
inv_blur = cv2.bitwise_not(blur)
img = cv2.divide(gray, inv_blur, scale=256.0)
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
return img
elif transform == "blur":
# apply blur effect
img = cv2.GaussianBlur(frame, (15, 15), 0)
return img
else:
return frame
def refresh_interface():
# Reset the image to an empty state or a default image
input_img.update(value=None)
# Return a message indicating the interface has been refreshed
return "Interface refreshed!"
def save_frame(frame):
# Convert frame to RGB
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# Create a unique filename using the current timestamp
filename = f"saved_frame_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png"
# Save the frame to a temporary file
cv2.imwrite(filename, frame)
# # Refresh the interface
# refresh_interface()
return filename
def webcam_input(frame, transform, lip_color):
frame, face_shape, glass_shape = process_frame_3d(frame)
if transform != "none" and lip_color == "none":
frame = transform_cv2(frame, transform)
elif lip_color != "none" and transform == "none":
frame = change_lip_color(frame, lip_color)
return frame, face_shape, glass_shape
# Gradio interface
with gr.Blocks(theme=gr.themes.Soft(primary_hue="purple", secondary_hue="blue")) as demo:
gr.Markdown("<h1 style='text-align: center; font-weight: bold;'>🤓 Glasses Virtual Try-On 🕶️👓</h1>")
with gr.Column(elem_classes=["my-column"]):
with gr.Group(elem_classes=["my-group"]):
gr.Markdown("<p style='text-align: left; color: purple;'>🟣You can only apply one filter at a time, either the transform filter or the lip color filter.</p>")
# Two filters: transform and lip color
with gr.Row():
transform = gr.Dropdown(
choices=["cartoon", "edges", "sepia", "negative", "sketch", "blur", "none"],
value="none", label="Select Filter"
)
lip_color = gr.Dropdown(
choices=["classic_red", "deep_red", "cherry_red", "rose_red", "wine_red", "brick_red", "coral_red", "berry_red", "ruby_red", "crimson_red", "none"],
value="none", label="Select Lip Color"
)
gr.Markdown("<p style='text-align: left; font-weight: bold; color: purple;'>🟣Click the Webcam icon to start the camera, and then press the record button to start the virtual try-on. If the glasses overlay isn’t showing, try moving further away from the camera.</p>")
input_img = gr.Image(sources=["webcam"], type="numpy", streaming=True)
next_button = gr.Button("Next Glasses➡️")
gr.Markdown("<p style='text-align: left; color: purple;'>🟣Face Shape and Recommended Glass Shape</p>")
# Face shape and recommended glass shape
with gr.Row():
face_shape_output = gr.Textbox(label="Detected Face Shape")
glass_shape_output = gr.Textbox(label="Recommended Glass Shape")
save_button = gr.Button("Save as a Picture📌")
gr.Markdown("<p style='text-align: left; color: red;'>‼️Warning: Refresh the page after saving the picture to use the virtual try-on again.</p>")
download_link = gr.File(label="Download Saved Picture")
input_img.stream(webcam_input, [input_img, transform, lip_color], [input_img, face_shape_output, glass_shape_output], stream_every=0.1)
with gr.Row():
next_button.click(change_glasses, [], [])
with gr.Row():
save_button.click(save_frame, [input_img], [download_link])
gr.Markdown("**Reminder:** All glasses images are screenshots from Goodr, segmented using glass_segmentation_helper.py, and then manually saved to the “glasses” folder for the try-on feature.")
if __name__ == "__main__":
demo.launch(share=True)