youssefKadaouiAbbassi commited on
Commit
8bf6961
·
verified ·
1 Parent(s): e34a11d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +82 -105
app.py CHANGED
@@ -1,56 +1,61 @@
1
- import cv2
2
  import torch
3
- import random
4
  import numpy as np
5
-
6
- import spaces
7
-
8
- import PIL
9
- from PIL import Image
10
- from typing import Tuple
11
-
12
- import diffusers
13
- from diffusers.utils import load_image
14
- from diffusers.models import ControlNetModel
15
- from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel
16
-
17
- from huggingface_hub import hf_hub_download
18
-
19
  from insightface.app import FaceAnalysis
20
-
21
- from style_template import styles
22
- from pipeline_stable_diffusion_xl_instantid_full import StableDiffusionXLInstantIDPipeline, draw_kps
23
-
24
  from depth_anything.dpt import DepthAnything
25
  from depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet
26
 
27
- import torch.nn.functional as F
28
- from torchvision.transforms import Compose
 
 
 
 
29
 
30
  # Global variables
31
- MAX_SEED = np.iinfo(np.int32).max
32
  device = "cuda" if torch.cuda.is_available() else "cpu"
33
- dtype = torch.float16 if torch.cuda.is_available() else torch.float32
34
- STYLE_NAMES = list(styles.keys())
35
- DEFAULT_STYLE_NAME = "Spring Festival"
36
-
37
- # Download checkpoints
38
- hf_hub_download(repo_id="InstantX/InstantID", filename="ControlNetModel/config.json", local_dir="./checkpoints")
39
- hf_hub_download(repo_id="InstantX/InstantID", filename="ControlNetModel/diffusion_pytorch_model.safetensors", local_dir="./checkpoints")
40
- hf_hub_download(repo_id="InstantX/InstantID", filename="ip-adapter.bin", local_dir="./checkpoints")
41
-
42
- # Load face encoder
43
- app = FaceAnalysis(name="antelopev2", root="./", providers=["CPUExecutionProvider"])
44
  app.prepare(ctx_id=0, det_size=(640, 640))
45
 
46
- # Depth map and transforms
47
- depth_anything = DepthAnything.from_pretrained('LiheYoung/depth_anything_vitl14').to(device).eval()
 
48
  transform = Compose([
49
- Resize(width=518, height=518, resize_target=False, keep_aspect_ratio=True, ensure_multiple_of=14, resize_method='lower_bound', image_interpolation_method=cv2.INTER_CUBIC),
50
  NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
51
  PrepareForNet(),
52
  ])
53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  def get_depth_map(image):
55
  image = np.array(image) / 255.0
56
  h, w = image.shape[:2]
@@ -58,91 +63,63 @@ def get_depth_map(image):
58
  image = torch.from_numpy(image).unsqueeze(0).to(device)
59
  with torch.no_grad():
60
  depth = depth_anything(image)
61
- depth = F.interpolate(depth[None], (h, w), mode='bilinear', align_corners=False)[0, 0]
62
- depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
63
- return Image.fromarray(depth.cpu().numpy().astype(np.uint8))
64
 
65
  def get_canny_image(image, t1=100, t2=200):
66
  image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
67
  edges = cv2.Canny(image, t1, t2)
68
  return Image.fromarray(edges, "L")
69
 
70
- # ControlNet paths and mapping
71
- controlnet_path = "./checkpoints/ControlNetModel"
72
- controlnet_identitynet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=dtype)
73
-
74
- controlnet_canny_model = "diffusers/controlnet-canny-sdxl-1.0"
75
- controlnet_depth_model = "diffusers/controlnet-depth-sdxl-1.0-small"
76
- controlnet_canny = ControlNetModel.from_pretrained(controlnet_canny_model, torch_dtype=dtype).to(device)
77
- controlnet_depth = ControlNetModel.from_pretrained(controlnet_depth_model, torch_dtype=dtype).to(device)
78
-
79
  controlnet_map = {
80
  "canny": controlnet_canny,
81
  "depth": controlnet_depth,
82
  }
 
83
  controlnet_map_fn = {
84
  "canny": get_canny_image,
85
  "depth": get_depth_map,
86
  }
87
 
88
- # Stable Diffusion XL pipeline
89
- pretrained_model_name_or_path = "wangqixun/YamerMIX_v8"
90
- pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
91
- pretrained_model_name_or_path,
92
- controlnet=[controlnet_identitynet],
93
- torch_dtype=dtype,
94
- safety_checker=None,
95
- feature_extractor=None,
96
- ).to(device)
97
-
98
- pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)
99
- pipe.load_lora_weights("latent-consistency/lcm-lora-sdxl")
100
- pipe.disable_lora()
101
- pipe.cuda()
102
- pipe.load_ip_adapter_instantid("./checkpoints/ip-adapter.bin")
103
-
104
- @spaces.GPU
105
- def generate_image(
106
- face_image_path, pose_image_path, prompt, negative_prompt, style_name, num_steps,
107
- identitynet_strength_ratio, adapter_strength_ratio, canny_strength, depth_strength,
108
- controlnet_selection, guidance_scale, seed, scheduler, enable_LCM, enhance_face_region,
109
- ):
110
- if enable_LCM:
111
- pipe.scheduler = diffusers.LCMScheduler.from_config(pipe.scheduler.config)
112
- pipe.enable_lora()
113
- else:
114
- pipe.disable_lora()
115
- scheduler_class_name = scheduler.split("-")[0]
116
- scheduler = getattr(diffusers, scheduler_class_name)
117
- pipe.scheduler = scheduler.from_config(pipe.scheduler.config)
118
-
119
- # Image preprocessing
120
- face_image = load_image(face_image_path)
121
- face_image = resize_img(face_image, max_side=1024)
122
- face_image_cv2 = cv2.cvtColor(np.array(face_image), cv2.COLOR_RGB2BGR)
123
- face_info = app.get(face_image_cv2)
124
-
125
  if not face_info:
126
- raise ValueError("No face detected in the input image.")
127
-
128
- face_emb = face_info[0]["embedding"]
129
- face_kps = draw_kps(face_image, face_info[0]["kps"])
130
- control_images = [face_kps]
131
- control_scales = [identitynet_strength_ratio]
132
-
133
- # MultiControlNet
134
- if controlnet_selection:
135
- selected_models = [controlnet_map[s] for s in controlnet_selection]
136
- selected_conditions = [controlnet_map_fn[s](face_image) for s in controlnet_selection]
137
- control_images += selected_conditions
138
- control_scales += [canny_strength, depth_strength]
139
 
140
- pipe.controlnet = MultiControlNetModel([controlnet_identitynet] + selected_models)
141
-
142
- generator = torch.Generator(device=device).manual_seed(seed)
143
  output = pipe(
144
- prompt=prompt, negative_prompt=negative_prompt, image_embeds=face_emb,
145
- image=control_images, controlnet_conditioning_scale=control_scales,
146
- num_inference_steps=num_steps, guidance_scale=guidance_scale, generator=generator,
 
 
 
 
147
  )
148
  return output.images[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
  import torch
3
+ import cv2
4
  import numpy as np
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  from insightface.app import FaceAnalysis
6
+ from diffusers import ControlNetModel, StableDiffusionXLInstantIDPipeline
7
+ from diffusers.utils import load_image
8
+ from torchvision.transforms import Compose
 
9
  from depth_anything.dpt import DepthAnything
10
  from depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet
11
 
12
+ # Suppress ONNX Runtime CPU thread affinity warnings
13
+ os.environ["ORT_DISABLE_CPU_AFFINITY"] = "1"
14
+
15
+ # Ensure CUDA provider is available for ONNX
16
+ import onnxruntime as ort
17
+ print("Available ONNX Runtime Providers:", ort.get_available_providers())
18
 
19
  # Global variables
 
20
  device = "cuda" if torch.cuda.is_available() else "cpu"
21
+ dtype = torch.float16 if device == "cuda" else torch.float32
22
+
23
+ # Configure FaceAnalysis with GPU support
24
+ app = FaceAnalysis(
25
+ name="antelopev2",
26
+ root="./",
27
+ providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
28
+ )
 
 
 
29
  app.prepare(ctx_id=0, det_size=(640, 640))
30
 
31
+ # Initialize DepthAnything for depth map generation
32
+ depth_anything = DepthAnything.from_pretrained("LiheYoung/depth_anything_vitl14").to(device).eval()
33
+
34
  transform = Compose([
35
+ Resize(width=518, height=518, resize_target=False, keep_aspect_ratio=True, ensure_multiple_of=14),
36
  NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
37
  PrepareForNet(),
38
  ])
39
 
40
+ # Load ControlNet models
41
+ controlnet_identitynet = ControlNetModel.from_pretrained("./checkpoints/ControlNetModel", torch_dtype=dtype)
42
+ controlnet_canny = ControlNetModel.from_pretrained("diffusers/controlnet-canny-sdxl-1.0", torch_dtype=dtype).to(device)
43
+ controlnet_depth = ControlNetModel.from_pretrained("diffusers/controlnet-depth-sdxl-1.0-small", torch_dtype=dtype).to(device)
44
+
45
+ # Load main pipeline
46
+ pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
47
+ "wangqixun/YamerMIX_v8",
48
+ controlnet=[controlnet_identitynet],
49
+ torch_dtype=dtype,
50
+ safety_checker=None,
51
+ feature_extractor=None,
52
+ ).to(device)
53
+
54
+ pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)
55
+ pipe.load_ip_adapter_instantid("./checkpoints/ip-adapter.bin")
56
+ pipe.cuda()
57
+
58
+ # Utility functions
59
  def get_depth_map(image):
60
  image = np.array(image) / 255.0
61
  h, w = image.shape[:2]
 
63
  image = torch.from_numpy(image).unsqueeze(0).to(device)
64
  with torch.no_grad():
65
  depth = depth_anything(image)
66
+ depth = torch.nn.functional.interpolate(depth[None], (h, w), mode='bilinear', align_corners=False)[0, 0]
67
+ depth = ((depth - depth.min()) / (depth.max() - depth.min()) * 255.0).cpu().numpy().astype(np.uint8)
68
+ return Image.fromarray(depth)
69
 
70
  def get_canny_image(image, t1=100, t2=200):
71
  image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
72
  edges = cv2.Canny(image, t1, t2)
73
  return Image.fromarray(edges, "L")
74
 
75
+ # Map for controlnet preprocessing
 
 
 
 
 
 
 
 
76
  controlnet_map = {
77
  "canny": controlnet_canny,
78
  "depth": controlnet_depth,
79
  }
80
+
81
  controlnet_map_fn = {
82
  "canny": get_canny_image,
83
  "depth": get_depth_map,
84
  }
85
 
86
+ # Generate image function
87
+ def generate_image(face_image_path, controlnet_selection, prompt, negative_prompt, num_steps, guidance_scale, seed):
88
+ face_image = load_image(face_image_path).resize((1024, 1024))
89
+ face_info = app.get(np.array(face_image))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
  if not face_info:
91
+ raise ValueError("No face detected in the image!")
92
+
93
+ control_images = []
94
+ for control_type in controlnet_selection:
95
+ if control_type in controlnet_map_fn:
96
+ control_images.append(controlnet_map_fn[control_type](face_image))
97
+
98
+ pipe.controlnet = [controlnet_identitynet] + [controlnet_map[control_type] for control_type in controlnet_selection]
99
+ generator = torch.manual_seed(seed)
 
 
 
 
100
 
 
 
 
101
  output = pipe(
102
+ prompt=prompt,
103
+ negative_prompt=negative_prompt,
104
+ image=control_images,
105
+ controlnet_conditioning_scale=1.0,
106
+ guidance_scale=guidance_scale,
107
+ num_inference_steps=num_steps,
108
+ generator=generator,
109
  )
110
  return output.images[0]
111
+
112
+ # Example usage
113
+ if __name__ == "__main__":
114
+ face_image_path = "./examples/yann-lecun_resize.jpg"
115
+ controlnet_selection = ["canny", "depth"]
116
+ prompt = "A person in vibrant colors"
117
+ negative_prompt = "(low quality, blurry)"
118
+ num_steps = 30
119
+ guidance_scale = 7.5
120
+ seed = 42
121
+
122
+ output_image = generate_image(
123
+ face_image_path, controlnet_selection, prompt, negative_prompt, num_steps, guidance_scale, seed
124
+ )
125
+ output_image.show()