radames's picture
Update app.py
03bb4fa verified
raw
history blame
10.7 kB
import sys
import os
import torch
from PIL import Image
from typing import List
import numpy as np
from utils import (
tensor_to_pil,
pil_to_tensor,
pad_image,
postprocess_image,
preprocess_image,
downloadModels,
examples,
)
sys.path.append(os.path.dirname("./ComfyUI/"))
from ComfyUI.nodes import (
CheckpointLoaderSimple,
VAEDecode,
VAEEncode,
KSampler,
EmptyLatentImage,
CLIPTextEncode,
)
from ComfyUI.comfy_extras.nodes_compositing import JoinImageWithAlpha
from ComfyUI.comfy_extras.nodes_mask import InvertMask, MaskToImage
from ComfyUI.comfy import samplers
from ComfyUI.custom_nodes.layerdiffuse.layered_diffusion import (
LayeredDiffusionFG,
LayeredDiffusionDecode,
LayeredDiffusionCond,
)
import gradio as gr
from briarmbg import BriaRMBG
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
downloadModels()
with torch.inference_mode():
ckpt_load_checkpoint = CheckpointLoaderSimple().load_checkpoint
ckpt = ckpt_load_checkpoint(
ckpt_name="juggernautXL_version6Rundiffusion.safetensors"
)
cliptextencode = CLIPTextEncode().encode
emptylatentimage_generate = EmptyLatentImage().generate
ksampler_sample = KSampler().sample
vae_decode = VAEDecode().decode
vae_encode = VAEEncode().encode
ld_fg_apply_layered_diffusion = LayeredDiffusionFG().apply_layered_diffusion
ld_cond_apply_layered_diffusion = LayeredDiffusionCond().apply_layered_diffusion
ld_decode = LayeredDiffusionDecode().decode
mask_to_image = MaskToImage().mask_to_image
invert_mask = InvertMask().invert
join_image_with_alpha = JoinImageWithAlpha().join_image_with_alpha
rmbg_model = BriaRMBG.from_pretrained("briaai/RMBG-1.4").to(device)
def predict(
prompt: str,
negative_prompt: str,
input_image: Image.Image,
remove_bg: bool,
cond_mode: str,
seed: int,
sampler_name: str,
scheduler: str,
steps: int,
cfg: float,
denoise: float,
):
seed = seed if seed != -1 else np.random.randint(0, 2**63 - 1)
try:
with torch.inference_mode():
cliptextencode_prompt = cliptextencode(
text=prompt,
clip=ckpt[1],
)
cliptextencode_negative_prompt = cliptextencode(
text=negative_prompt,
clip=ckpt[1],
)
emptylatentimage_sample = emptylatentimage_generate(
width=1024, height=1024, batch_size=1
)
if input_image is not None:
input_image = pad_image(input_image).resize((1024, 1024))
if remove_bg:
orig_im_size = input_image.size
image = preprocess_image(np.array(input_image), [1024, 1024]).to(
device
)
result = rmbg_model(image)
# post process
result_mask_image = postprocess_image(result[0][0], orig_im_size)
# save result
pil_mask = Image.fromarray(result_mask_image)
no_bg_image = Image.new("RGBA", pil_mask.size, (0, 0, 0, 0))
no_bg_image.paste(input_image, mask=pil_mask)
input_image = no_bg_image
img_tensor = pil_to_tensor(input_image)
img_latent = vae_encode(pixels=img_tensor[0], vae=ckpt[2])
layereddiffusionapply_sample = ld_cond_apply_layered_diffusion(
config=cond_mode,
weight=1,
model=ckpt[0],
cond=cliptextencode_prompt[0],
uncond=cliptextencode_negative_prompt[0],
latent=img_latent[0],
)
ksampler = ksampler_sample(
steps=steps,
cfg=cfg,
sampler_name=sampler_name,
scheduler=scheduler,
seed=seed,
model=layereddiffusionapply_sample[0],
positive=layereddiffusionapply_sample[1],
negative=layereddiffusionapply_sample[2],
latent_image=emptylatentimage_sample[0],
denoise=denoise,
)
vaedecode_sample = vae_decode(
samples=ksampler[0],
vae=ckpt[2],
)
layereddiffusiondecode_sample = ld_decode(
sd_version="SDXL",
sub_batch_size=16,
samples=ksampler[0],
images=vaedecode_sample[0],
)
rgb_img = tensor_to_pil(vaedecode_sample[0])
return (rgb_img[0], rgb_img[0], seed)
else:
layereddiffusionapply_sample = ld_fg_apply_layered_diffusion(
config="SDXL, Conv Injection", weight=1, model=ckpt[0]
)
ksampler = ksampler_sample(
steps=steps,
cfg=cfg,
sampler_name=sampler_name,
scheduler=scheduler,
seed=seed,
model=layereddiffusionapply_sample[0],
positive=cliptextencode_prompt[0],
negative=cliptextencode_negative_prompt[0],
latent_image=emptylatentimage_sample[0],
denoise=denoise,
)
vaedecode_sample = vae_decode(
samples=ksampler[0],
vae=ckpt[2],
)
layereddiffusiondecode_sample = ld_decode(
sd_version="SDXL",
sub_batch_size=16,
samples=ksampler[0],
images=vaedecode_sample[0],
)
mask = mask_to_image(mask=layereddiffusiondecode_sample[1])
ld_image = tensor_to_pil(layereddiffusiondecode_sample[0][0])
inverted_mask = invert_mask(mask=layereddiffusiondecode_sample[1])
rgba_img = join_image_with_alpha(
image=layereddiffusiondecode_sample[0], alpha=inverted_mask[0]
)
rgba_img = tensor_to_pil(rgba_img[0])
mask = tensor_to_pil(mask[0])
rgb_img = tensor_to_pil(vaedecode_sample[0])
return (rgba_img[0], mask[0], seed)
# return flatten([rgba_img, mask, rgb_img, ld_image])
except Exception as e:
raise gr.Error(e)
def flatten(l: List[List[any]]) -> List[any]:
return [item for sublist in l for item in sublist]
def predict_examples(
prompt,
negative_prompt,
input_image=None,
remove_bg=False,
cond_mode=None,
seed=-1,
cfg=10,
):
return predict(
prompt,
negative_prompt,
input_image,
remove_bg,
cond_mode,
seed,
"dpmpp_2m_sde_gpu",
"karras",
30,
cfg,
1.0,
)
css = """
.gradio-container { max-width: 68rem !important; }
"""
with gr.Blocks(css=css) as blocks:
gr.Markdown("""# LayerDiffuse (unofficial)
Using ComfyUI building blocks with custom node by [huchenlei](https://github.com/huchenlei/ComfyUI-layerdiffuse)
Models: [LayerDiffusion/layerdiffusion-v1](https://huggingface.co/LayerDiffusion/layerdiffusion-v1/tree/main)
Paper: [Transparent Image Layer Diffusion using Latent Transparency](https://huggingface.co/papers/2402.17113)
""")
with gr.Row():
with gr.Column():
prompt = gr.Text(label="Prompt")
negative_prompt = gr.Text(label="Negative Prompt")
button = gr.Button("Generate")
with gr.Accordion(open=False, label="Input Images (Optional)"):
with gr.Group():
cond_mode = gr.Radio(
value="SDXL, Foreground",
choices=["SDXL, Foreground", "SDXL, Background"],
info="Whether to use input image as foreground or background",
)
remove_bg = gr.Checkbox(
info="Remove background using BriaRMBG",
label="Remove Background",
value=False,
)
input_image = gr.Image(
label="Input Image",
type="pil",
)
with gr.Accordion(open=False, label="Advanced Options"):
with gr.Group():
with gr.Row():
seed = gr.Slider(
label="Seed",
value=-1,
minimum=-1,
maximum=0xFFFFFFFFFFFFFFFF,
step=1,
)
curr_seed = gr.Number(
value=-1, interactive=False, scale=0, label=" "
)
sampler_name = gr.Dropdown(
choices=samplers.KSampler.SAMPLERS,
label="Sampler Name",
value="dpmpp_2m_sde_gpu",
)
scheduler = gr.Dropdown(
choices=samplers.KSampler.SCHEDULERS,
label="Scheduler",
value="karras",
)
steps = gr.Slider(
label="Steps", value=20, minimum=1, maximum=50, step=1
)
cfg = gr.Number(
label="CFG", value=5.0, minimum=0.0, maximum=100.0, step=0.1
)
denoise = gr.Number(
label="Denoise", value=1.0, minimum=0.0, maximum=1.0, step=0.01
)
with gr.Column():
image = gr.Image()
with gr.Accordion(label="Mask", open=False):
mask = gr.Image()
inputs = [
prompt,
negative_prompt,
input_image,
remove_bg,
cond_mode,
seed,
sampler_name,
scheduler,
steps,
cfg,
denoise,
]
outputs = [image, mask, curr_seed]
button.click(fn=predict, inputs=inputs, outputs=outputs)
gr.Examples(
fn=predict_examples,
examples=examples,
inputs=[
prompt,
negative_prompt,
input_image,
remove_bg,
cond_mode,
seed,
],
outputs=outputs,
cache_examples=True,
)
if __name__ == "__main__":
blocks.launch()