|
import sys |
|
import os |
|
import torch |
|
from pathlib import Path |
|
from huggingface_hub import hf_hub_download |
|
from PIL import Image, ImageSequence, ImageOps |
|
from typing import List |
|
import numpy as np |
|
|
|
sys.path.append(os.path.dirname("./ComfyUI/")) |
|
from ComfyUI.nodes import ( |
|
CheckpointLoaderSimple, |
|
VAEDecode, |
|
VAEEncode, |
|
KSampler, |
|
EmptyLatentImage, |
|
CLIPTextEncode, |
|
) |
|
from ComfyUI.comfy_extras.nodes_compositing import JoinImageWithAlpha |
|
from ComfyUI.comfy_extras.nodes_mask import InvertMask, MaskToImage |
|
|
|
from ComfyUI.comfy import samplers |
|
|
|
from ComfyUI.custom_nodes.layerdiffuse.layered_diffusion import ( |
|
LayeredDiffusionFG, |
|
LayeredDiffusionDecode, |
|
LayeredDiffusionCond, |
|
) |
|
import gradio as gr |
|
|
|
|
|
MODEL_PATH = hf_hub_download( |
|
repo_id="lllyasviel/fav_models", |
|
subfolder="fav", |
|
filename="juggernautXL_v8Rundiffusion.safetensors", |
|
) |
|
try: |
|
os.symlink( |
|
MODEL_PATH, |
|
Path("./ComfyUI/models/checkpoints/juggernautXL_v8Rundiffusion.safetensors"), |
|
) |
|
except FileExistsError: |
|
pass |
|
|
|
with torch.inference_mode(): |
|
ckpt_load_checkpoint = CheckpointLoaderSimple().load_checkpoint |
|
ckpt = ckpt_load_checkpoint(ckpt_name="juggernautXL_v8Rundiffusion.safetensors") |
|
|
|
cliptextencode = CLIPTextEncode().encode |
|
emptylatentimage_generate = EmptyLatentImage().generate |
|
ksampler_sample = KSampler().sample |
|
vae_decode = VAEDecode().decode |
|
vae_encode = VAEEncode().encode |
|
ld_fg_apply_layered_diffusion = LayeredDiffusionFG().apply_layered_diffusion |
|
ld_cond_apply_layered_diffusion = LayeredDiffusionCond().apply_layered_diffusion |
|
|
|
ld_decode = LayeredDiffusionDecode().decode |
|
mask_to_image = MaskToImage().mask_to_image |
|
invert_mask = InvertMask().invert |
|
join_image_with_alpha = JoinImageWithAlpha().join_image_with_alpha |
|
|
|
|
|
def tensor_to_pil(images: torch.Tensor | List[torch.Tensor]) -> List[Image.Image]: |
|
if not isinstance(images, list): |
|
images = [images] |
|
imgs = [] |
|
for image in images: |
|
i = 255.0 * image.cpu().numpy() |
|
img = Image.fromarray(np.clip(np.squeeze(i), 0, 255).astype(np.uint8)) |
|
imgs.append(img) |
|
return imgs |
|
|
|
|
|
def pad_image(input_image): |
|
pad_w, pad_h = ( |
|
np.max(((2, 2), np.ceil(np.array(input_image.size) / 64).astype(int)), axis=0) |
|
* 64 |
|
- input_image.size |
|
) |
|
im_padded = Image.fromarray( |
|
np.pad(np.array(input_image), ((0, pad_h), (0, pad_w), (0, 0)), mode="edge") |
|
) |
|
w, h = im_padded.size |
|
if w == h: |
|
return im_padded |
|
elif w > h: |
|
new_image = Image.new(im_padded.mode, (w, w), (0, 0, 0)) |
|
new_image.paste(im_padded, (0, (w - h) // 2)) |
|
return new_image |
|
else: |
|
new_image = Image.new(im_padded.mode, (h, h), (0, 0, 0)) |
|
new_image.paste(im_padded, ((h - w) // 2, 0)) |
|
return new_image |
|
|
|
|
|
def pil_to_tensor(image: Image.Image) -> tuple[torch.Tensor, torch.Tensor]: |
|
output_images = [] |
|
output_masks = [] |
|
for i in ImageSequence.Iterator(image): |
|
i = ImageOps.exif_transpose(i) |
|
if i.mode == "I": |
|
i = i.point(lambda i: i * (1 / 255)) |
|
image = i.convert("RGB") |
|
image = np.array(image).astype(np.float32) / 255.0 |
|
image = torch.from_numpy(image)[None,] |
|
if "A" in i.getbands(): |
|
mask = np.array(i.getchannel("A")).astype(np.float32) / 255.0 |
|
mask = 1.0 - torch.from_numpy(mask) |
|
else: |
|
mask = torch.zeros((64, 64), dtype=torch.float32, device="cpu") |
|
output_images.append(image) |
|
output_masks.append(mask.unsqueeze(0)) |
|
|
|
if len(output_images) > 1: |
|
output_image = torch.cat(output_images, dim=0) |
|
output_mask = torch.cat(output_masks, dim=0) |
|
else: |
|
output_image = output_images[0] |
|
output_mask = output_masks[0] |
|
|
|
return (output_image, output_mask) |
|
|
|
|
|
def predict( |
|
prompt: str, |
|
negative_prompt: str, |
|
input_image: Image.Image | None, |
|
cond_mode: str, |
|
seed: int, |
|
sampler_name: str, |
|
scheduler: str, |
|
steps: int, |
|
cfg: float, |
|
denoise: float, |
|
): |
|
with torch.inference_mode(): |
|
cliptextencode_prompt = cliptextencode( |
|
text=prompt, |
|
clip=ckpt[1], |
|
) |
|
cliptextencode_negative_prompt = cliptextencode( |
|
text=negative_prompt, |
|
clip=ckpt[1], |
|
) |
|
emptylatentimage_sample = emptylatentimage_generate( |
|
width=1024, height=1024, batch_size=1 |
|
) |
|
|
|
if input_image is not None: |
|
img_tensor = pil_to_tensor(pad_image(input_image).resize((1024, 1024))) |
|
img_latent = vae_encode(pixels=img_tensor[0], vae=ckpt[2]) |
|
layereddiffusionapply_sample = ld_cond_apply_layered_diffusion( |
|
config=cond_mode, |
|
weight=1, |
|
model=ckpt[0], |
|
cond=cliptextencode_prompt[0], |
|
uncond=cliptextencode_negative_prompt[0], |
|
latent=img_latent[0], |
|
) |
|
ksampler = ksampler_sample( |
|
steps=steps, |
|
cfg=cfg, |
|
sampler_name=sampler_name, |
|
scheduler=scheduler, |
|
seed=seed, |
|
model=layereddiffusionapply_sample[0], |
|
positive=layereddiffusionapply_sample[1], |
|
negative=layereddiffusionapply_sample[2], |
|
latent_image=emptylatentimage_sample[0], |
|
denoise=denoise, |
|
) |
|
|
|
vaedecode_sample = vae_decode( |
|
samples=ksampler[0], |
|
vae=ckpt[2], |
|
) |
|
layereddiffusiondecode_sample = ld_decode( |
|
sd_version="SDXL", |
|
sub_batch_size=16, |
|
samples=ksampler[0], |
|
images=vaedecode_sample[0], |
|
) |
|
|
|
rgb_img = tensor_to_pil(vaedecode_sample[0]) |
|
return flatten([rgb_img]) |
|
|
|
else: |
|
layereddiffusionapply_sample = ld_fg_apply_layered_diffusion( |
|
config="SDXL, Conv Injection", weight=1, model=ckpt[0] |
|
) |
|
ksampler = ksampler_sample( |
|
steps=steps, |
|
cfg=cfg, |
|
sampler_name=sampler_name, |
|
scheduler=scheduler, |
|
seed=seed, |
|
model=layereddiffusionapply_sample[0], |
|
positive=cliptextencode_prompt[0], |
|
negative=cliptextencode_negative_prompt[0], |
|
latent_image=emptylatentimage_sample[0], |
|
denoise=denoise, |
|
) |
|
|
|
vaedecode_sample = vae_decode( |
|
samples=ksampler[0], |
|
vae=ckpt[2], |
|
) |
|
layereddiffusiondecode_sample = ld_decode( |
|
sd_version="SDXL", |
|
sub_batch_size=16, |
|
samples=ksampler[0], |
|
images=vaedecode_sample[0], |
|
) |
|
mask = mask_to_image(mask=layereddiffusiondecode_sample[1]) |
|
ld_image = tensor_to_pil(layereddiffusiondecode_sample[0][0]) |
|
inverted_mask = invert_mask(mask=layereddiffusiondecode_sample[1]) |
|
rgba_img = join_image_with_alpha( |
|
image=layereddiffusiondecode_sample[0], alpha=inverted_mask[0] |
|
) |
|
rgba_img = tensor_to_pil(rgba_img[0]) |
|
mask = tensor_to_pil(mask[0]) |
|
rgb_img = tensor_to_pil(vaedecode_sample[0]) |
|
|
|
return flatten([rgba_img, mask, rgb_img, ld_image]) |
|
|
|
|
|
examples = [["An old men sit on a chair looking at the sky"]] |
|
|
|
|
|
def flatten(l: List[List[any]]) -> List[any]: |
|
return [item for sublist in l for item in sublist] |
|
|
|
|
|
def predict_examples(prompt, negative_prompt): |
|
return predict( |
|
prompt, negative_prompt, None, None, 0, "euler", "normal", 20, 8.0, 1.0 |
|
) |
|
|
|
|
|
css = """ |
|
.gradio-container{ |
|
max-width: 60rem; |
|
} |
|
""" |
|
with gr.Blocks(css=css) as blocks: |
|
gr.Markdown("""# LayerDiffuse (unofficial) |
|
|
|
""") |
|
|
|
with gr.Row(): |
|
with gr.Column(): |
|
prompt = gr.Text(label="Prompt") |
|
negative_prompt = gr.Text(label="Negative Prompt") |
|
button = gr.Button("Generate") |
|
with gr.Accordion(open=False, label="Input Images (Optional)"): |
|
cond_mode = gr.Radio( |
|
value="SDXL, Foreground", |
|
choices=["SDXL, Foreground", "SDXL, Background"], |
|
info="Whether to use input image as foreground or background", |
|
) |
|
input_image = gr.Image(label="Input Image", type="pil") |
|
with gr.Accordion(open=False, label="Advanced Options"): |
|
seed = gr.Slider( |
|
label="Seed", |
|
value=0, |
|
minimum=-1, |
|
maximum=0xFFFFFFFFFFFFFFFF, |
|
step=1, |
|
randomize=True, |
|
) |
|
sampler_name = gr.Dropdown( |
|
choices=samplers.KSampler.SAMPLERS, |
|
label="Sampler Name", |
|
value=samplers.KSampler.SAMPLERS[0], |
|
) |
|
scheduler = gr.Dropdown( |
|
choices=samplers.KSampler.SCHEDULERS, |
|
label="Scheduler", |
|
value=samplers.KSampler.SCHEDULERS[0], |
|
) |
|
steps = gr.Number( |
|
label="Steps", value=20, minimum=1, maximum=10000, step=1 |
|
) |
|
cfg = gr.Number( |
|
label="CFG", value=8.0, minimum=0.0, maximum=100.0, step=0.1 |
|
) |
|
denoise = gr.Number( |
|
label="Denoise", value=1.0, minimum=0.0, maximum=1.0, step=0.01 |
|
) |
|
|
|
with gr.Column(scale=1.8): |
|
gallery = gr.Gallery( |
|
columns=[2], rows=[2], object_fit="contain", height="unset" |
|
) |
|
|
|
inputs = [ |
|
prompt, |
|
negative_prompt, |
|
input_image, |
|
cond_mode, |
|
seed, |
|
sampler_name, |
|
scheduler, |
|
steps, |
|
cfg, |
|
denoise, |
|
] |
|
outputs = [gallery] |
|
|
|
gr.Examples( |
|
fn=predict_examples, |
|
examples=examples, |
|
inputs=[prompt, negative_prompt], |
|
outputs=outputs, |
|
cache_examples=False, |
|
) |
|
|
|
button.click(fn=predict, inputs=inputs, outputs=outputs) |
|
|
|
|
|
if __name__ == "__main__": |
|
blocks.launch() |
|
|