teampix-instandId

Running on Zero

App Files Files Community

youssefKadaouiAbbassi commited on Dec 6, 2024

Commit

8bf6961

verified ·

1 Parent(s): e34a11d

Update app.py

Browse files

Files changed (1) hide show

app.py +82 -105

app.py CHANGED Viewed

@@ -1,56 +1,61 @@
-import cv2
 import torch
-import random
 import numpy as np
-import spaces
-import PIL
-from PIL import Image
-from typing import Tuple
-import diffusers
-from diffusers.utils import load_image
-from diffusers.models import ControlNetModel
-from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel
-from huggingface_hub import hf_hub_download
 from insightface.app import FaceAnalysis
-from style_template import styles
-from pipeline_stable_diffusion_xl_instantid_full import StableDiffusionXLInstantIDPipeline, draw_kps
 from depth_anything.dpt import DepthAnything
 from depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet
-import torch.nn.functional as F
-from torchvision.transforms import Compose
 # Global variables
-MAX_SEED = np.iinfo(np.int32).max
 device = "cuda" if torch.cuda.is_available() else "cpu"
-dtype = torch.float16 if torch.cuda.is_available() else torch.float32
-STYLE_NAMES = list(styles.keys())
-DEFAULT_STYLE_NAME = "Spring Festival"
-# Download checkpoints
-hf_hub_download(repo_id="InstantX/InstantID", filename="ControlNetModel/config.json", local_dir="./checkpoints")
-hf_hub_download(repo_id="InstantX/InstantID", filename="ControlNetModel/diffusion_pytorch_model.safetensors", local_dir="./checkpoints")
-hf_hub_download(repo_id="InstantX/InstantID", filename="ip-adapter.bin", local_dir="./checkpoints")
-# Load face encoder
-app = FaceAnalysis(name="antelopev2", root="./", providers=["CPUExecutionProvider"])
 app.prepare(ctx_id=0, det_size=(640, 640))
-# Depth map and transforms
-depth_anything = DepthAnything.from_pretrained('LiheYoung/depth_anything_vitl14').to(device).eval()
 transform = Compose([
-    Resize(width=518, height=518, resize_target=False, keep_aspect_ratio=True, ensure_multiple_of=14, resize_method='lower_bound', image_interpolation_method=cv2.INTER_CUBIC),
     NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
     PrepareForNet(),
 ])
 def get_depth_map(image):
     image = np.array(image) / 255.0
     h, w = image.shape[:2]
@@ -58,91 +63,63 @@ def get_depth_map(image):
     image = torch.from_numpy(image).unsqueeze(0).to(device)
     with torch.no_grad():
         depth = depth_anything(image)
-    depth = F.interpolate(depth[None], (h, w), mode='bilinear', align_corners=False)[0, 0]
-    depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
-    return Image.fromarray(depth.cpu().numpy().astype(np.uint8))
 def get_canny_image(image, t1=100, t2=200):
     image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
     edges = cv2.Canny(image, t1, t2)
     return Image.fromarray(edges, "L")
-# ControlNet paths and mapping
-controlnet_path = "./checkpoints/ControlNetModel"
-controlnet_identitynet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=dtype)
-controlnet_canny_model = "diffusers/controlnet-canny-sdxl-1.0"
-controlnet_depth_model = "diffusers/controlnet-depth-sdxl-1.0-small"
-controlnet_canny = ControlNetModel.from_pretrained(controlnet_canny_model, torch_dtype=dtype).to(device)
-controlnet_depth = ControlNetModel.from_pretrained(controlnet_depth_model, torch_dtype=dtype).to(device)
 controlnet_map = {
     "canny": controlnet_canny,
     "depth": controlnet_depth,
 }
 controlnet_map_fn = {
     "canny": get_canny_image,
     "depth": get_depth_map,
 }
-# Stable Diffusion XL pipeline
-pretrained_model_name_or_path = "wangqixun/YamerMIX_v8"
-pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
-    pretrained_model_name_or_path,
-    controlnet=[controlnet_identitynet],
-    torch_dtype=dtype,
-    safety_checker=None,
-    feature_extractor=None,
-).to(device)
-pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)
-pipe.load_lora_weights("latent-consistency/lcm-lora-sdxl")
-pipe.disable_lora()
-pipe.cuda()
-pipe.load_ip_adapter_instantid("./checkpoints/ip-adapter.bin")
-@spaces.GPU
-def generate_image(
-    face_image_path, pose_image_path, prompt, negative_prompt, style_name, num_steps,
-    identitynet_strength_ratio, adapter_strength_ratio, canny_strength, depth_strength,
-    controlnet_selection, guidance_scale, seed, scheduler, enable_LCM, enhance_face_region,
-):
-    if enable_LCM:
-        pipe.scheduler = diffusers.LCMScheduler.from_config(pipe.scheduler.config)
-        pipe.enable_lora()
-    else:
-        pipe.disable_lora()
-        scheduler_class_name = scheduler.split("-")[0]
-        scheduler = getattr(diffusers, scheduler_class_name)
-        pipe.scheduler = scheduler.from_config(pipe.scheduler.config)
-    # Image preprocessing
-    face_image = load_image(face_image_path)
-    face_image = resize_img(face_image, max_side=1024)
-    face_image_cv2 = cv2.cvtColor(np.array(face_image), cv2.COLOR_RGB2BGR)
-    face_info = app.get(face_image_cv2)
     if not face_info:
-        raise ValueError("No face detected in the input image.")
-    face_emb = face_info[0]["embedding"]
-    face_kps = draw_kps(face_image, face_info[0]["kps"])
-    control_images = [face_kps]
-    control_scales = [identitynet_strength_ratio]
-    # MultiControlNet
-    if controlnet_selection:
-        selected_models = [controlnet_map[s] for s in controlnet_selection]
-        selected_conditions = [controlnet_map_fn[s](face_image) for s in controlnet_selection]
-        control_images += selected_conditions
-        control_scales += [canny_strength, depth_strength]
-    pipe.controlnet = MultiControlNetModel([controlnet_identitynet] + selected_models)
-    generator = torch.Generator(device=device).manual_seed(seed)
     output = pipe(
-        prompt=prompt, negative_prompt=negative_prompt, image_embeds=face_emb,
-        image=control_images, controlnet_conditioning_scale=control_scales,
-        num_inference_steps=num_steps, guidance_scale=guidance_scale, generator=generator,
     )
     return output.images[0]

+import os
 import torch
+import cv2
 import numpy as np
 from insightface.app import FaceAnalysis
+from diffusers import ControlNetModel, StableDiffusionXLInstantIDPipeline
+from diffusers.utils import load_image
+from torchvision.transforms import Compose
 from depth_anything.dpt import DepthAnything
 from depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet
+# Suppress ONNX Runtime CPU thread affinity warnings
+os.environ["ORT_DISABLE_CPU_AFFINITY"] = "1"
+# Ensure CUDA provider is available for ONNX
+import onnxruntime as ort
+print("Available ONNX Runtime Providers:", ort.get_available_providers())
 # Global variables
 device = "cuda" if torch.cuda.is_available() else "cpu"
+dtype = torch.float16 if device == "cuda" else torch.float32
+# Configure FaceAnalysis with GPU support
+app = FaceAnalysis(
+    name="antelopev2",
+    root="./",
+    providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
+)
 app.prepare(ctx_id=0, det_size=(640, 640))
+# Initialize DepthAnything for depth map generation
+depth_anything = DepthAnything.from_pretrained("LiheYoung/depth_anything_vitl14").to(device).eval()
 transform = Compose([
+    Resize(width=518, height=518, resize_target=False, keep_aspect_ratio=True, ensure_multiple_of=14),
     NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
     PrepareForNet(),
 ])
+# Load ControlNet models
+controlnet_identitynet = ControlNetModel.from_pretrained("./checkpoints/ControlNetModel", torch_dtype=dtype)
+controlnet_canny = ControlNetModel.from_pretrained("diffusers/controlnet-canny-sdxl-1.0", torch_dtype=dtype).to(device)
+controlnet_depth = ControlNetModel.from_pretrained("diffusers/controlnet-depth-sdxl-1.0-small", torch_dtype=dtype).to(device)
+# Load main pipeline
+pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
+    "wangqixun/YamerMIX_v8",
+    controlnet=[controlnet_identitynet],
+    torch_dtype=dtype,
+    safety_checker=None,
+    feature_extractor=None,
+).to(device)
+pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)
+pipe.load_ip_adapter_instantid("./checkpoints/ip-adapter.bin")
+pipe.cuda()
+# Utility functions
 def get_depth_map(image):
     image = np.array(image) / 255.0
     h, w = image.shape[:2]
     image = torch.from_numpy(image).unsqueeze(0).to(device)
     with torch.no_grad():
         depth = depth_anything(image)
+    depth = torch.nn.functional.interpolate(depth[None], (h, w), mode='bilinear', align_corners=False)[0, 0]
+    depth = ((depth - depth.min()) / (depth.max() - depth.min()) * 255.0).cpu().numpy().astype(np.uint8)
+    return Image.fromarray(depth)
 def get_canny_image(image, t1=100, t2=200):
     image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
     edges = cv2.Canny(image, t1, t2)
     return Image.fromarray(edges, "L")
+# Map for controlnet preprocessing
 controlnet_map = {
     "canny": controlnet_canny,
     "depth": controlnet_depth,
 }
 controlnet_map_fn = {
     "canny": get_canny_image,
     "depth": get_depth_map,
 }
+# Generate image function
+def generate_image(face_image_path, controlnet_selection, prompt, negative_prompt, num_steps, guidance_scale, seed):
+    face_image = load_image(face_image_path).resize((1024, 1024))
+    face_info = app.get(np.array(face_image))
     if not face_info:
+        raise ValueError("No face detected in the image!")
+    control_images = []
+    for control_type in controlnet_selection:
+        if control_type in controlnet_map_fn:
+            control_images.append(controlnet_map_fn[control_type](face_image))
+    pipe.controlnet = [controlnet_identitynet] + [controlnet_map[control_type] for control_type in controlnet_selection]
+    generator = torch.manual_seed(seed)
     output = pipe(
+        prompt=prompt,
+        negative_prompt=negative_prompt,
+        image=control_images,
+        controlnet_conditioning_scale=1.0,
+        guidance_scale=guidance_scale,
+        num_inference_steps=num_steps,
+        generator=generator,
     )
     return output.images[0]
+# Example usage
+if __name__ == "__main__":
+    face_image_path = "./examples/yann-lecun_resize.jpg"
+    controlnet_selection = ["canny", "depth"]
+    prompt = "A person in vibrant colors"
+    negative_prompt = "(low quality, blurry)"
+    num_steps = 30
+    guidance_scale = 7.5
+    seed = 42
+    output_image = generate_image(
+        face_image_path, controlnet_selection, prompt, negative_prompt, num_steps, guidance_scale, seed
+    )
+    output_image.show()