Spaces:
Running
on
Zero
Running
on
Zero
youssefKadaouiAbbassi
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -1,56 +1,61 @@
|
|
1 |
-
import
|
2 |
import torch
|
3 |
-
import
|
4 |
import numpy as np
|
5 |
-
|
6 |
-
import spaces
|
7 |
-
|
8 |
-
import PIL
|
9 |
-
from PIL import Image
|
10 |
-
from typing import Tuple
|
11 |
-
|
12 |
-
import diffusers
|
13 |
-
from diffusers.utils import load_image
|
14 |
-
from diffusers.models import ControlNetModel
|
15 |
-
from diffusers.pipelines.controlnet.multicontrolnet import MultiControlNetModel
|
16 |
-
|
17 |
-
from huggingface_hub import hf_hub_download
|
18 |
-
|
19 |
from insightface.app import FaceAnalysis
|
20 |
-
|
21 |
-
from
|
22 |
-
from
|
23 |
-
|
24 |
from depth_anything.dpt import DepthAnything
|
25 |
from depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet
|
26 |
|
27 |
-
|
28 |
-
|
|
|
|
|
|
|
|
|
29 |
|
30 |
# Global variables
|
31 |
-
MAX_SEED = np.iinfo(np.int32).max
|
32 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
33 |
-
dtype = torch.float16 if
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
# Load face encoder
|
43 |
-
app = FaceAnalysis(name="antelopev2", root="./", providers=["CPUExecutionProvider"])
|
44 |
app.prepare(ctx_id=0, det_size=(640, 640))
|
45 |
|
46 |
-
#
|
47 |
-
depth_anything = DepthAnything.from_pretrained(
|
|
|
48 |
transform = Compose([
|
49 |
-
Resize(width=518, height=518, resize_target=False, keep_aspect_ratio=True, ensure_multiple_of=14
|
50 |
NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
51 |
PrepareForNet(),
|
52 |
])
|
53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
def get_depth_map(image):
|
55 |
image = np.array(image) / 255.0
|
56 |
h, w = image.shape[:2]
|
@@ -58,91 +63,63 @@ def get_depth_map(image):
|
|
58 |
image = torch.from_numpy(image).unsqueeze(0).to(device)
|
59 |
with torch.no_grad():
|
60 |
depth = depth_anything(image)
|
61 |
-
depth =
|
62 |
-
depth = (depth - depth.min()) / (depth.max() - depth.min()) * 255.0
|
63 |
-
return Image.fromarray(depth
|
64 |
|
65 |
def get_canny_image(image, t1=100, t2=200):
|
66 |
image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
|
67 |
edges = cv2.Canny(image, t1, t2)
|
68 |
return Image.fromarray(edges, "L")
|
69 |
|
70 |
-
#
|
71 |
-
controlnet_path = "./checkpoints/ControlNetModel"
|
72 |
-
controlnet_identitynet = ControlNetModel.from_pretrained(controlnet_path, torch_dtype=dtype)
|
73 |
-
|
74 |
-
controlnet_canny_model = "diffusers/controlnet-canny-sdxl-1.0"
|
75 |
-
controlnet_depth_model = "diffusers/controlnet-depth-sdxl-1.0-small"
|
76 |
-
controlnet_canny = ControlNetModel.from_pretrained(controlnet_canny_model, torch_dtype=dtype).to(device)
|
77 |
-
controlnet_depth = ControlNetModel.from_pretrained(controlnet_depth_model, torch_dtype=dtype).to(device)
|
78 |
-
|
79 |
controlnet_map = {
|
80 |
"canny": controlnet_canny,
|
81 |
"depth": controlnet_depth,
|
82 |
}
|
|
|
83 |
controlnet_map_fn = {
|
84 |
"canny": get_canny_image,
|
85 |
"depth": get_depth_map,
|
86 |
}
|
87 |
|
88 |
-
#
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
controlnet=[controlnet_identitynet],
|
93 |
-
torch_dtype=dtype,
|
94 |
-
safety_checker=None,
|
95 |
-
feature_extractor=None,
|
96 |
-
).to(device)
|
97 |
-
|
98 |
-
pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)
|
99 |
-
pipe.load_lora_weights("latent-consistency/lcm-lora-sdxl")
|
100 |
-
pipe.disable_lora()
|
101 |
-
pipe.cuda()
|
102 |
-
pipe.load_ip_adapter_instantid("./checkpoints/ip-adapter.bin")
|
103 |
-
|
104 |
-
@spaces.GPU
|
105 |
-
def generate_image(
|
106 |
-
face_image_path, pose_image_path, prompt, negative_prompt, style_name, num_steps,
|
107 |
-
identitynet_strength_ratio, adapter_strength_ratio, canny_strength, depth_strength,
|
108 |
-
controlnet_selection, guidance_scale, seed, scheduler, enable_LCM, enhance_face_region,
|
109 |
-
):
|
110 |
-
if enable_LCM:
|
111 |
-
pipe.scheduler = diffusers.LCMScheduler.from_config(pipe.scheduler.config)
|
112 |
-
pipe.enable_lora()
|
113 |
-
else:
|
114 |
-
pipe.disable_lora()
|
115 |
-
scheduler_class_name = scheduler.split("-")[0]
|
116 |
-
scheduler = getattr(diffusers, scheduler_class_name)
|
117 |
-
pipe.scheduler = scheduler.from_config(pipe.scheduler.config)
|
118 |
-
|
119 |
-
# Image preprocessing
|
120 |
-
face_image = load_image(face_image_path)
|
121 |
-
face_image = resize_img(face_image, max_side=1024)
|
122 |
-
face_image_cv2 = cv2.cvtColor(np.array(face_image), cv2.COLOR_RGB2BGR)
|
123 |
-
face_info = app.get(face_image_cv2)
|
124 |
-
|
125 |
if not face_info:
|
126 |
-
raise ValueError("No face detected in the
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
selected_models = [controlnet_map[s] for s in controlnet_selection]
|
136 |
-
selected_conditions = [controlnet_map_fn[s](face_image) for s in controlnet_selection]
|
137 |
-
control_images += selected_conditions
|
138 |
-
control_scales += [canny_strength, depth_strength]
|
139 |
|
140 |
-
pipe.controlnet = MultiControlNetModel([controlnet_identitynet] + selected_models)
|
141 |
-
|
142 |
-
generator = torch.Generator(device=device).manual_seed(seed)
|
143 |
output = pipe(
|
144 |
-
prompt=prompt,
|
145 |
-
|
146 |
-
|
|
|
|
|
|
|
|
|
147 |
)
|
148 |
return output.images[0]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
import torch
|
3 |
+
import cv2
|
4 |
import numpy as np
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
from insightface.app import FaceAnalysis
|
6 |
+
from diffusers import ControlNetModel, StableDiffusionXLInstantIDPipeline
|
7 |
+
from diffusers.utils import load_image
|
8 |
+
from torchvision.transforms import Compose
|
|
|
9 |
from depth_anything.dpt import DepthAnything
|
10 |
from depth_anything.util.transform import Resize, NormalizeImage, PrepareForNet
|
11 |
|
12 |
+
# Suppress ONNX Runtime CPU thread affinity warnings
|
13 |
+
os.environ["ORT_DISABLE_CPU_AFFINITY"] = "1"
|
14 |
+
|
15 |
+
# Ensure CUDA provider is available for ONNX
|
16 |
+
import onnxruntime as ort
|
17 |
+
print("Available ONNX Runtime Providers:", ort.get_available_providers())
|
18 |
|
19 |
# Global variables
|
|
|
20 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
21 |
+
dtype = torch.float16 if device == "cuda" else torch.float32
|
22 |
+
|
23 |
+
# Configure FaceAnalysis with GPU support
|
24 |
+
app = FaceAnalysis(
|
25 |
+
name="antelopev2",
|
26 |
+
root="./",
|
27 |
+
providers=["CUDAExecutionProvider", "CPUExecutionProvider"],
|
28 |
+
)
|
|
|
|
|
|
|
29 |
app.prepare(ctx_id=0, det_size=(640, 640))
|
30 |
|
31 |
+
# Initialize DepthAnything for depth map generation
|
32 |
+
depth_anything = DepthAnything.from_pretrained("LiheYoung/depth_anything_vitl14").to(device).eval()
|
33 |
+
|
34 |
transform = Compose([
|
35 |
+
Resize(width=518, height=518, resize_target=False, keep_aspect_ratio=True, ensure_multiple_of=14),
|
36 |
NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
|
37 |
PrepareForNet(),
|
38 |
])
|
39 |
|
40 |
+
# Load ControlNet models
|
41 |
+
controlnet_identitynet = ControlNetModel.from_pretrained("./checkpoints/ControlNetModel", torch_dtype=dtype)
|
42 |
+
controlnet_canny = ControlNetModel.from_pretrained("diffusers/controlnet-canny-sdxl-1.0", torch_dtype=dtype).to(device)
|
43 |
+
controlnet_depth = ControlNetModel.from_pretrained("diffusers/controlnet-depth-sdxl-1.0-small", torch_dtype=dtype).to(device)
|
44 |
+
|
45 |
+
# Load main pipeline
|
46 |
+
pipe = StableDiffusionXLInstantIDPipeline.from_pretrained(
|
47 |
+
"wangqixun/YamerMIX_v8",
|
48 |
+
controlnet=[controlnet_identitynet],
|
49 |
+
torch_dtype=dtype,
|
50 |
+
safety_checker=None,
|
51 |
+
feature_extractor=None,
|
52 |
+
).to(device)
|
53 |
+
|
54 |
+
pipe.scheduler = diffusers.EulerDiscreteScheduler.from_config(pipe.scheduler.config)
|
55 |
+
pipe.load_ip_adapter_instantid("./checkpoints/ip-adapter.bin")
|
56 |
+
pipe.cuda()
|
57 |
+
|
58 |
+
# Utility functions
|
59 |
def get_depth_map(image):
|
60 |
image = np.array(image) / 255.0
|
61 |
h, w = image.shape[:2]
|
|
|
63 |
image = torch.from_numpy(image).unsqueeze(0).to(device)
|
64 |
with torch.no_grad():
|
65 |
depth = depth_anything(image)
|
66 |
+
depth = torch.nn.functional.interpolate(depth[None], (h, w), mode='bilinear', align_corners=False)[0, 0]
|
67 |
+
depth = ((depth - depth.min()) / (depth.max() - depth.min()) * 255.0).cpu().numpy().astype(np.uint8)
|
68 |
+
return Image.fromarray(depth)
|
69 |
|
70 |
def get_canny_image(image, t1=100, t2=200):
|
71 |
image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
|
72 |
edges = cv2.Canny(image, t1, t2)
|
73 |
return Image.fromarray(edges, "L")
|
74 |
|
75 |
+
# Map for controlnet preprocessing
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
76 |
controlnet_map = {
|
77 |
"canny": controlnet_canny,
|
78 |
"depth": controlnet_depth,
|
79 |
}
|
80 |
+
|
81 |
controlnet_map_fn = {
|
82 |
"canny": get_canny_image,
|
83 |
"depth": get_depth_map,
|
84 |
}
|
85 |
|
86 |
+
# Generate image function
|
87 |
+
def generate_image(face_image_path, controlnet_selection, prompt, negative_prompt, num_steps, guidance_scale, seed):
|
88 |
+
face_image = load_image(face_image_path).resize((1024, 1024))
|
89 |
+
face_info = app.get(np.array(face_image))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
if not face_info:
|
91 |
+
raise ValueError("No face detected in the image!")
|
92 |
+
|
93 |
+
control_images = []
|
94 |
+
for control_type in controlnet_selection:
|
95 |
+
if control_type in controlnet_map_fn:
|
96 |
+
control_images.append(controlnet_map_fn[control_type](face_image))
|
97 |
+
|
98 |
+
pipe.controlnet = [controlnet_identitynet] + [controlnet_map[control_type] for control_type in controlnet_selection]
|
99 |
+
generator = torch.manual_seed(seed)
|
|
|
|
|
|
|
|
|
100 |
|
|
|
|
|
|
|
101 |
output = pipe(
|
102 |
+
prompt=prompt,
|
103 |
+
negative_prompt=negative_prompt,
|
104 |
+
image=control_images,
|
105 |
+
controlnet_conditioning_scale=1.0,
|
106 |
+
guidance_scale=guidance_scale,
|
107 |
+
num_inference_steps=num_steps,
|
108 |
+
generator=generator,
|
109 |
)
|
110 |
return output.images[0]
|
111 |
+
|
112 |
+
# Example usage
|
113 |
+
if __name__ == "__main__":
|
114 |
+
face_image_path = "./examples/yann-lecun_resize.jpg"
|
115 |
+
controlnet_selection = ["canny", "depth"]
|
116 |
+
prompt = "A person in vibrant colors"
|
117 |
+
negative_prompt = "(low quality, blurry)"
|
118 |
+
num_steps = 30
|
119 |
+
guidance_scale = 7.5
|
120 |
+
seed = 42
|
121 |
+
|
122 |
+
output_image = generate_image(
|
123 |
+
face_image_path, controlnet_selection, prompt, negative_prompt, num_steps, guidance_scale, seed
|
124 |
+
)
|
125 |
+
output_image.show()
|