# preprocess.py ```py import PIL.Image import torch, gc from controlnet_aux_local import NormalBaeDetector#, CannyDetector class Preprocessor: MODEL_ID = "lllyasviel/Annotators" def __init__(self): self.model = None self.name = "" def load(self, name: str) -> None: if name == self.name: return elif name == "NormalBae": print("Loading NormalBae") self.model = NormalBaeDetector.from_pretrained(self.MODEL_ID).to("cuda") torch.cuda.empty_cache() self.name = name else: raise ValueError return def __call__(self, image: PIL.Image.Image, **kwargs) -> PIL.Image.Image: return self.model(image, **kwargs) ``` # app.py ```py prod = False port = 8080 show_options = False if prod: port = 8081 # show_options = False import os import random import time import gradio as gr import numpy as np import spaces import imageio from huggingface_hub import HfApi import gc import torch from PIL import Image from diffusers import ( ControlNetModel, DPMSolverMultistepScheduler, StableDiffusionControlNetPipeline, # AutoencoderKL, ) from controlnet_aux_local import NormalBaeDetector MAX_SEED = np.iinfo(np.int32).max API_KEY = os.environ.get("API_KEY", None) # os.environ['HF_HOME'] = '/data/.huggingface' print("CUDA version:", torch.version.cuda) print("loading everything") compiled = False api = HfApi() class Preprocessor: MODEL_ID = "lllyasviel/Annotators" def __init__(self): self.model = None self.name = "" def load(self, name: str) -> None: if name == self.name: return elif name == "NormalBae": print("Loading NormalBae") self.model = NormalBaeDetector.from_pretrained(self.MODEL_ID).to("cuda") torch.cuda.empty_cache() self.name = name else: raise ValueError return def __call__(self, image: Image.Image, **kwargs) -> Image.Image: return self.model(image, **kwargs) if gr.NO_RELOAD: # Controlnet Normal model_id = "lllyasviel/control_v11p_sd15_normalbae" print("initializing controlnet") controlnet = ControlNetModel.from_pretrained( model_id, torch_dtype=torch.float16, attn_implementation="flash_attention_2", ).to("cuda") # Scheduler scheduler = DPMSolverMultistepScheduler.from_pretrained( "runwayml/stable-diffusion-v1-5", solver_order=2, subfolder="scheduler", use_karras_sigmas=True, final_sigmas_type="sigma_min", algorithm_type="sde-dpmsolver++", prediction_type="epsilon", thresholding=False, denoise_final=True, device_map="cuda", torch_dtype=torch.float16, ) # Stable Diffusion Pipeline URL # base_model_url = "https://huggingface.co/broyang/hentaidigitalart_v20/blob/main/realcartoon3d_v15.safetensors" base_model_url = "https://huggingface.co/Lykon/AbsoluteReality/blob/main/AbsoluteReality_1.8.1_pruned.safetensors" # vae_url = "https://huggingface.co/stabilityai/sd-vae-ft-mse-original/blob/main/vae-ft-mse-840000-ema-pruned.safetensors" # print('loading vae') # vae = AutoencoderKL.from_single_file(vae_url, torch_dtype=torch.float16).to("cuda") # vae.to(memory_format=torch.channels_last) print('loading pipe') pipe = StableDiffusionControlNetPipeline.from_single_file( base_model_url, safety_checker=None, controlnet=controlnet, scheduler=scheduler, # vae=vae, torch_dtype=torch.float16, ).to("cuda") print("loading preprocessor") preprocessor = Preprocessor() preprocessor.load("NormalBae") # pipe.load_textual_inversion("broyang/hentaidigitalart_v20", weight_name="EasyNegativeV2.safetensors", token="EasyNegativeV2",) # pipe.load_textual_inversion("broyang/hentaidigitalart_v20", weight_name="badhandv4.pt", token="badhandv4") # pipe.load_textual_inversion("broyang/hentaidigitalart_v20", weight_name="fcNeg-neg.pt", token="fcNeg-neg") # pipe.load_textual_inversion("broyang/hentaidigitalart_v20", weight_name="HDA_Ahegao.pt", token="HDA_Ahegao") # pipe.load_textual_inversion("broyang/hentaidigitalart_v20", weight_name="HDA_Bondage.pt", token="HDA_Bondage") # pipe.load_textual_inversion("broyang/hentaidigitalart_v20", weight_name="HDA_pet_play.pt", token="HDA_pet_play") # pipe.load_textual_inversion("broyang/hentaidigitalart_v20", weight_name="HDA_unconventional maid.pt", token="HDA_unconventional_maid") # pipe.load_textual_inversion("broyang/hentaidigitalart_v20", weight_name="HDA_NakedHoodie.pt", token="HDA_NakedHoodie") # pipe.load_textual_inversion("broyang/hentaidigitalart_v20", weight_name="HDA_NunDress.pt", token="HDA_NunDress") # pipe.load_textual_inversion("broyang/hentaidigitalart_v20", weight_name="HDA_Shibari.pt", token="HDA_Shibari") pipe.to("cuda") print("---------------Loaded controlnet pipeline---------------") torch.cuda.empty_cache() gc.collect() print(f"CUDA memory allocated: {torch.cuda.max_memory_allocated(device='cuda') / 1e9:.2f} GB") print("Model Compiled!") def randomize_seed_fn(seed: int, randomize_seed: bool) -> int: if randomize_seed: seed = random.randint(0, MAX_SEED) return seed def get_additional_prompt(): prompt = "hyperrealistic photography,extremely detailed,(intricate details),unity 8k wallpaper,ultra detailed" top = ["tank top", "blouse", "button up shirt", "sweater", "corset top"] bottom = ["short skirt", "athletic shorts", "jean shorts", "pleated skirt", "short skirt", "leggings", "high-waisted shorts"] accessory = ["knee-high boots", "gloves", "Thigh-high stockings", "Garter belt", "choker", "necklace", "headband", "headphones"] return f"{prompt}, {random.choice(top)}, {random.choice(bottom)}, {random.choice(accessory)}, score_9" # outfit = ["schoolgirl outfit", "playboy outfit", "red dress", "gala dress", "cheerleader outfit", "nurse outfit", "Kimono"] def get_prompt(prompt, additional_prompt): interior = "design-style interior designed (interior space),tungsten white balance,captured with a DSLR camera using f/10 aperture, 1/60 sec shutter speed, ISO 400, 20mm focal length" default = "hyperrealistic photography,extremely detailed,(intricate details),unity 8k wallpaper,ultra detailed" default2 = f"professional 3d model {prompt},octane render,highly detailed,volumetric,dramatic lighting,hyperrealistic photography,extremely detailed,(intricate details),unity 8k wallpaper,ultra detailed" randomize = get_additional_prompt() # nude = "NSFW,((nude)),medium bare breasts,hyperrealistic photography,extremely detailed,(intricate details),unity 8k wallpaper,ultra detailed" # bodypaint = "((fully naked with no clothes)),nude naked seethroughxray,invisiblebodypaint,rating_newd,NSFW" lab_girl = "hyperrealistic photography, extremely detailed, shy assistant wearing minidress boots and gloves, laboratory background, score_9, 1girl" pet_play = "hyperrealistic photography, extremely detailed, playful, blush, glasses, collar, score_9, HDA_pet_play" bondage = "hyperrealistic photography, extremely detailed, submissive, glasses, score_9, HDA_Bondage" # ahegao = "((invisible clothing)), hyperrealistic photography,exposed vagina,sexy,nsfw,HDA_Ahegao" ahegao2 = "(invisiblebodypaint),rating_newd,HDA_Ahegao" athleisure = "hyperrealistic photography, extremely detailed, 1girl athlete, exhausted embarrassed sweaty,outdoors, ((athleisure clothing)), score_9" atompunk = "((atompunk world)), hyperrealistic photography, extremely detailed, short hair, bodysuit, glasses, neon cyberpunk background, score_9" maid = "hyperrealistic photography, extremely detailed, shy, blushing, score_9, pastel background, HDA_unconventional_maid" nundress = "hyperrealistic photography, extremely detailed, shy, blushing, fantasy background, score_9, HDA_NunDress" naked_hoodie = "hyperrealistic photography, extremely detailed, medium hair, cityscape, (neon lights), score_9, HDA_NakedHoodie" abg = "(1girl, asian body covered in words, words on body, tattoos of (words) on body),(masterpiece, best quality),medium breasts,(intricate details),unity 8k wallpaper,ultra detailed,(pastel colors),beautiful and aesthetic,see-through (clothes),detailed,solo" # shibari = "extremely detailed, hyperrealistic photography, earrings, blushing, lace choker, tattoo, medium hair, score_9, HDA_Shibari" shibari2 = "octane render, highly detailed, volumetric, HDA_Shibari" if prompt == "": girls = [randomize, pet_play, bondage, lab_girl, athleisure, atompunk, maid, nundress, naked_hoodie, abg, shibari2, ahegao2] prompts_nsfw = [abg, shibari2, ahegao2] prompt = f"{random.choice(girls)}" prompt = f"boho chic" # print(f"-------------{preset}-------------") else: prompt = f"Photo from Pinterest of {prompt} {interior}" # prompt = default2 return f"{prompt} f{additional_prompt}" style_list = [ { "name": "None", "prompt": "" }, { "name": "Minimalistic", "prompt": "Minimalist interior design,clean lines,neutral colors,uncluttered space,functional furniture,lots of natural light" }, { "name": "Boho", "prompt": "Bohemian chic interior,eclectic mix of patterns and textures,vintage furniture,plants,woven textiles,warm earthy colors" }, { "name": "Farmhouse", "prompt": "Modern farmhouse interior,rustic wood elements,shiplap walls,neutral color palette,industrial accents,cozy textiles" }, { "name": "Saudi Prince", "prompt": "Opulent gold interior,luxurious ornate furniture,crystal chandeliers,rich fabrics,marble floors,intricate Arabic patterns" }, { "name": "Neoclassical", "prompt": "Neoclassical interior design,elegant columns,ornate moldings,symmetrical layout,refined furniture,muted color palette" }, { "name": "Eclectic", "prompt": "Eclectic interior design,mix of styles and eras,bold color combinations,diverse furniture pieces,unique art objects" }, { "name": "Parisian", "prompt": "Parisian apartment interior,all-white color scheme,ornate moldings,herringbone wood floors,elegant furniture,large windows" }, { "name": "Hollywood", "prompt": "Hollywood Regency interior,glamorous and luxurious,bold colors,mirrored surfaces,velvet upholstery,gold accents" }, { "name": "Scandinavian", "prompt": "Scandinavian interior design,light wood tones,white walls,minimalist furniture,cozy textiles,hygge atmosphere" }, { "name": "Beach", "prompt": "Coastal beach house interior,light blue and white color scheme,weathered wood,nautical accents,sheer curtains,ocean view" }, { "name": "Japanese", "prompt": "Traditional Japanese interior,tatami mats,shoji screens,low furniture,zen garden view,minimalist decor,natural materials" }, { "name": "Midcentury Modern", "prompt": "Mid-century modern interior,1950s-60s style furniture,organic shapes,warm wood tones,bold accent colors,large windows" }, { "name": "Retro Futurism", "prompt": "Neon (atompunk world) retro cyberpunk background", }, { "name": "Texan", "prompt": "Western cowboy interior,rustic wood beams,leather furniture,cowhide rugs,antler chandeliers,southwestern patterns" }, { "name": "Matrix", "prompt": "Futuristic cyberpunk interior,neon accent lighting,holographic plants,sleek black surfaces,advanced gaming setup,transparent screens,Blade Runner inspired decor,high-tech minimalist furniture" } ] styles = {k["name"]: (k["prompt"]) for k in style_list} STYLE_NAMES = list(styles.keys()) def apply_style(style_name): if style_name in styles: p = styles.get(style_name, "none") return p css = """ h1, h2, h3 { text-align: center; display: block; } footer { visibility: hidden; } .gradio-container { max-width: 1100px !important; } .gr-image { display: flex; justify-content: center; align-items: center; width: 100%; height: 512px; overflow: hidden; } .gr-image img { width: 100%; height: 100%; object-fit: cover; object-position: center; } """ with gr.Blocks(theme="bethecloud/storj_theme", css=css) as demo: ############################################################################# with gr.Row(): with gr.Accordion("Advanced options", open=show_options, visible=show_options): num_images = gr.Slider( label="Images", minimum=1, maximum=4, value=1, step=1 ) image_resolution = gr.Slider( label="Image resolution", minimum=256, maximum=1024, value=512, step=256, ) preprocess_resolution = gr.Slider( label="Preprocess resolution", minimum=128, maximum=1024, value=512, step=1, ) num_steps = gr.Slider( label="Number of steps", minimum=1, maximum=100, value=15, step=1 ) # 20/4.5 or 12 without lora, 4 with lora guidance_scale = gr.Slider( label="Guidance scale", minimum=0.1, maximum=30.0, value=5.5, step=0.1 ) # 5 without lora, 2 with lora seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0) randomize_seed = gr.Checkbox(label="Randomize seed", value=True) a_prompt = gr.Textbox( label="Additional prompt", value = "design-style interior designed (interior space), tungsten white balance, captured with a DSLR camera using f/10 aperture, 1/60 sec shutter speed, ISO 400, 20mm focal length" ) n_prompt = gr.Textbox( label="Negative prompt", value="EasyNegativeV2, fcNeg, (badhandv4:1.4), (worst quality, low quality, bad quality, normal quality:2.0), (bad hands, missing fingers, extra fingers:2.0)", ) ############################################################################# # input text with gr.Column(): prompt = gr.Textbox( label="Custom Design", placeholder="Enter a description (optional)", ) # design options with gr.Row(visible=True): style_selection = gr.Radio( show_label=True, container=True, interactive=True, choices=STYLE_NAMES, value="None", label="Design Styles", ) # input image with gr.Row(equal_height=True): with gr.Column(scale=1, min_width=300): image = gr.Image( label="Input", sources=["upload"], show_label=True, mirror_webcam=True, type="pil", ) # run button with gr.Column(): run_button = gr.Button(value="Use this one", size="lg", visible=False) # output image with gr.Column(scale=1, min_width=300): result = gr.Image( label="Output", interactive=False, type="pil", show_share_button= False, ) # Use this image button with gr.Column(): use_ai_button = gr.Button(value="Use this one", size="lg", visible=False) config = [ image, style_selection, prompt, a_prompt, n_prompt, num_images, image_resolution, preprocess_resolution, num_steps, guidance_scale, seed, ] with gr.Row(): helper_text = gr.Markdown("## Tap and hold (on mobile) to save the image.", visible=True) # image processing @gr.on(triggers=[image.upload, prompt.submit, run_button.click], inputs=config, outputs=result, show_progress="minimal") def auto_process_image(image, style_selection, prompt, a_prompt, n_prompt, num_images, image_resolution, preprocess_resolution, num_steps, guidance_scale, seed, progress=gr.Progress(track_tqdm=True)): return process_image(image, style_selection, prompt, a_prompt, n_prompt, num_images, image_resolution, preprocess_resolution, num_steps, guidance_scale, seed) # AI image processing @gr.on(triggers=[use_ai_button.click], inputs=[result] + config, outputs=[image, result], show_progress="minimal") def submit(previous_result, image, style_selection, prompt, a_prompt, n_prompt, num_images, image_resolution, preprocess_resolution, num_steps, guidance_scale, seed, progress=gr.Progress(track_tqdm=True)): # First, yield the previous result to update the input image immediately yield previous_result, gr.update() # Then, process the new input image new_result = process_image(previous_result, style_selection, prompt, a_prompt, n_prompt, num_images, image_resolution, preprocess_resolution, num_steps, guidance_scale, seed) # Finally, yield the new result yield previous_result, new_result # Turn off buttons when processing @gr.on(triggers=[image.upload, use_ai_button.click, run_button.click], inputs=None, outputs=[run_button, use_ai_button], show_progress="hidden") def turn_buttons_off(): return gr.update(visible=False), gr.update(visible=False) # Turn on buttons when processing is complete @gr.on(triggers=[result.change], inputs=None, outputs=[use_ai_button, run_button], show_progress="hidden") def turn_buttons_on(): return gr.update(visible=True), gr.update(visible=True) @spaces.GPU(duration=12) @torch.inference_mode() def process_image( image, style_selection, prompt, a_prompt, n_prompt, num_images, image_resolution, preprocess_resolution, num_steps, guidance_scale, seed, ): preprocess_start = time.time() print("processing image") seed = random.randint(0, MAX_SEED) generator = torch.cuda.manual_seed(seed) preprocessor.load("NormalBae") control_image = preprocessor( image=image, image_resolution=image_resolution, detect_resolution=preprocess_resolution, ) preprocess_time = time.time() - preprocess_start if style_selection is not None or style_selection != "None": prompt = "Photo from Pinterest of " + apply_style(style_selection) + " " + prompt + "," + a_prompt else: prompt=str(get_prompt(prompt, a_prompt)) negative_prompt=str(n_prompt) print(prompt) print(f"\n-------------------------Preprocess done in: {preprocess_time:.2f} seconds-------------------------") start = time.time() results = pipe( prompt=prompt, negative_prompt=negative_prompt, guidance_scale=guidance_scale, num_images_per_prompt=num_images, num_inference_steps=num_steps, generator=generator, image=control_image, ).images[0] print(f"\n-------------------------Inference done in: {time.time() - start:.2f} seconds-------------------------") torch.cuda.empty_cache() # upload block timestamp = int(time.time()) img_path = f"{timestamp}.jpg" results_path = f"{timestamp}_out.jpg" imageio.imsave(img_path, image) imageio.imsave(results_path, results) api.upload_file( path_or_fileobj=img_path, path_in_repo=img_path, repo_id="broyang/interior-ai-outputs", repo_type="dataset", token=API_KEY, run_as_future=True, ) api.upload_file( path_or_fileobj=results_path, path_in_repo=results_path, repo_id="broyang/interior-ai-outputs", repo_type="dataset", token=API_KEY, run_as_future=True, ) return results if prod: demo.queue(max_size=20).launch(server_name="localhost", server_port=port) else: demo.queue(api_open=False).launch(show_api=False) ``` # .aidigestignore ``` controlnet_aux_local/normalbae/* requirements.txt win.requirements.txt web.html client.py local_app.py README.md Dockerfile .gitignore .gitattributes ``` # controlnet_aux_local/util.py ```py import os import random import cv2 import numpy as np import torch annotator_ckpts_path = os.path.join(os.path.dirname(__file__), 'ckpts') def HWC3(x): assert x.dtype == np.uint8 if x.ndim == 2: x = x[:, :, None] assert x.ndim == 3 H, W, C = x.shape assert C == 1 or C == 3 or C == 4 if C == 3: return x if C == 1: return np.concatenate([x, x, x], axis=2) if C == 4: color = x[:, :, 0:3].astype(np.float32) alpha = x[:, :, 3:4].astype(np.float32) / 255.0 y = color * alpha + 255.0 * (1.0 - alpha) y = y.clip(0, 255).astype(np.uint8) return y def make_noise_disk(H, W, C, F): noise = np.random.uniform(low=0, high=1, size=((H // F) + 2, (W // F) + 2, C)) noise = cv2.resize(noise, (W + 2 * F, H + 2 * F), interpolation=cv2.INTER_CUBIC) noise = noise[F: F + H, F: F + W] noise -= np.min(noise) noise /= np.max(noise) if C == 1: noise = noise[:, :, None] return noise def nms(x, t, s): x = cv2.GaussianBlur(x.astype(np.float32), (0, 0), s) f1 = np.array([[0, 0, 0], [1, 1, 1], [0, 0, 0]], dtype=np.uint8) f2 = np.array([[0, 1, 0], [0, 1, 0], [0, 1, 0]], dtype=np.uint8) f3 = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=np.uint8) f4 = np.array([[0, 0, 1], [0, 1, 0], [1, 0, 0]], dtype=np.uint8) y = np.zeros_like(x) for f in [f1, f2, f3, f4]: np.putmask(y, cv2.dilate(x, kernel=f) == x, x) z = np.zeros_like(y, dtype=np.uint8) z[y > t] = 255 return z def min_max_norm(x): x -= np.min(x) x /= np.maximum(np.max(x), 1e-5) return x def safe_step(x, step=2): y = x.astype(np.float32) * float(step + 1) y = y.astype(np.int32).astype(np.float32) / float(step) return y def img2mask(img, H, W, low=10, high=90): assert img.ndim == 3 or img.ndim == 2 assert img.dtype == np.uint8 if img.ndim == 3: y = img[:, :, random.randrange(0, img.shape[2])] else: y = img y = cv2.resize(y, (W, H), interpolation=cv2.INTER_CUBIC) if random.uniform(0, 1) < 0.5: y = 255 - y return y < np.percentile(y, random.randrange(low, high)) def resize_image(input_image, resolution): H, W, C = input_image.shape H = float(H) W = float(W) k = float(resolution) / min(H, W) H *= k W *= k H = int(np.round(H / 64.0)) * 64 W = int(np.round(W / 64.0)) * 64 img = cv2.resize(input_image, (W, H), interpolation=cv2.INTER_LANCZOS4 if k > 1 else cv2.INTER_AREA) return img def torch_gc(): if torch.cuda.is_available(): torch.cuda.empty_cache() torch.cuda.ipc_collect() def ade_palette(): """ADE20K palette that maps each class to RGB values.""" return [[120, 120, 120], [180, 120, 120], [6, 230, 230], [80, 50, 50], [4, 200, 3], [120, 120, 80], [140, 140, 140], [204, 5, 255], [230, 230, 230], [4, 250, 7], [224, 5, 255], [235, 255, 7], [150, 5, 61], [120, 120, 70], [8, 255, 51], [255, 6, 82], [143, 255, 140], [204, 255, 4], [255, 51, 7], [204, 70, 3], [0, 102, 200], [61, 230, 250], [255, 6, 51], [11, 102, 255], [255, 7, 71], [255, 9, 224], [9, 7, 230], [220, 220, 220], [255, 9, 92], [112, 9, 255], [8, 255, 214], [7, 255, 224], [255, 184, 6], [10, 255, 71], [255, 41, 10], [7, 255, 255], [224, 255, 8], [102, 8, 255], [255, 61, 6], [255, 194, 7], [255, 122, 8], [0, 255, 20], [255, 8, 41], [255, 5, 153], [6, 51, 255], [235, 12, 255], [160, 150, 20], [0, 163, 255], [140, 140, 140], [250, 10, 15], [20, 255, 0], [31, 255, 0], [255, 31, 0], [255, 224, 0], [153, 255, 0], [0, 0, 255], [255, 71, 0], [0, 235, 255], [0, 173, 255], [31, 0, 255], [11, 200, 200], [255, 82, 0], [0, 255, 245], [0, 61, 255], [0, 255, 112], [0, 255, 133], [255, 0, 0], [255, 163, 0], [255, 102, 0], [194, 255, 0], [0, 143, 255], [51, 255, 0], [0, 82, 255], [0, 255, 41], [0, 255, 173], [10, 0, 255], [173, 255, 0], [0, 255, 153], [255, 92, 0], [255, 0, 255], [255, 0, 245], [255, 0, 102], [255, 173, 0], [255, 0, 20], [255, 184, 184], [0, 31, 255], [0, 255, 61], [0, 71, 255], [255, 0, 204], [0, 255, 194], [0, 255, 82], [0, 10, 255], [0, 112, 255], [51, 0, 255], [0, 194, 255], [0, 122, 255], [0, 255, 163], [255, 153, 0], [0, 255, 10], [255, 112, 0], [143, 255, 0], [82, 0, 255], [163, 255, 0], [255, 235, 0], [8, 184, 170], [133, 0, 255], [0, 255, 92], [184, 0, 255], [255, 0, 31], [0, 184, 255], [0, 214, 255], [255, 0, 112], [92, 255, 0], [0, 224, 255], [112, 224, 255], [70, 184, 160], [163, 0, 255], [153, 0, 255], [71, 255, 0], [255, 0, 163], [255, 204, 0], [255, 0, 143], [0, 255, 235], [133, 255, 0], [255, 0, 235], [245, 0, 255], [255, 0, 122], [255, 245, 0], [10, 190, 212], [214, 255, 0], [0, 204, 255], [20, 0, 255], [255, 255, 0], [0, 153, 255], [0, 41, 255], [0, 255, 204], [41, 0, 255], [41, 255, 0], [173, 0, 255], [0, 245, 255], [71, 0, 255], [122, 0, 255], [0, 255, 184], [0, 92, 255], [184, 255, 0], [0, 133, 255], [255, 214, 0], [25, 194, 194], [102, 255, 0], [92, 0, 255]] ``` # controlnet_aux_local/processor.py ```py """ This file contains a Processor that can be used to process images with controlnet aux processors """ import io import logging from typing import Dict, Optional, Union from PIL import Image from controlnet_aux_local import (CannyDetector, ContentShuffleDetector, HEDdetector, LeresDetector, LineartAnimeDetector, LineartDetector, MediapipeFaceDetector, MidasDetector, MLSDdetector, NormalBaeDetector, OpenposeDetector, PidiNetDetector, ZoeDetector, DWposeDetector) LOGGER = logging.getLogger(__name__) MODELS = { # checkpoint models 'scribble_hed': {'class': HEDdetector, 'checkpoint': True}, 'softedge_hed': {'class': HEDdetector, 'checkpoint': True}, 'scribble_hedsafe': {'class': HEDdetector, 'checkpoint': True}, 'softedge_hedsafe': {'class': HEDdetector, 'checkpoint': True}, 'depth_midas': {'class': MidasDetector, 'checkpoint': True}, 'mlsd': {'class': MLSDdetector, 'checkpoint': True}, 'openpose': {'class': OpenposeDetector, 'checkpoint': True}, 'openpose_face': {'class': OpenposeDetector, 'checkpoint': True}, 'openpose_faceonly': {'class': OpenposeDetector, 'checkpoint': True}, 'openpose_full': {'class': OpenposeDetector, 'checkpoint': True}, 'openpose_hand': {'class': OpenposeDetector, 'checkpoint': True}, 'dwpose': {'class': DWposeDetector, 'checkpoint': True}, 'scribble_pidinet': {'class': PidiNetDetector, 'checkpoint': True}, 'softedge_pidinet': {'class': PidiNetDetector, 'checkpoint': True}, 'scribble_pidsafe': {'class': PidiNetDetector, 'checkpoint': True}, 'softedge_pidsafe': {'class': PidiNetDetector, 'checkpoint': True}, 'normal_bae': {'class': NormalBaeDetector, 'checkpoint': True}, 'lineart_coarse': {'class': LineartDetector, 'checkpoint': True}, 'lineart_realistic': {'class': LineartDetector, 'checkpoint': True}, 'lineart_anime': {'class': LineartAnimeDetector, 'checkpoint': True}, 'depth_zoe': {'class': ZoeDetector, 'checkpoint': True}, 'depth_leres': {'class': LeresDetector, 'checkpoint': True}, 'depth_leres++': {'class': LeresDetector, 'checkpoint': True}, # instantiate 'shuffle': {'class': ContentShuffleDetector, 'checkpoint': False}, 'mediapipe_face': {'class': MediapipeFaceDetector, 'checkpoint': False}, 'canny': {'class': CannyDetector, 'checkpoint': False}, } MODEL_PARAMS = { 'scribble_hed': {'scribble': True}, 'softedge_hed': {'scribble': False}, 'scribble_hedsafe': {'scribble': True, 'safe': True}, 'softedge_hedsafe': {'scribble': False, 'safe': True}, 'depth_midas': {}, 'mlsd': {}, 'openpose': {'include_body': True, 'include_hand': False, 'include_face': False}, 'openpose_face': {'include_body': True, 'include_hand': False, 'include_face': True}, 'openpose_faceonly': {'include_body': False, 'include_hand': False, 'include_face': True}, 'openpose_full': {'include_body': True, 'include_hand': True, 'include_face': True}, 'openpose_hand': {'include_body': False, 'include_hand': True, 'include_face': False}, 'dwpose': {}, 'scribble_pidinet': {'safe': False, 'scribble': True}, 'softedge_pidinet': {'safe': False, 'scribble': False}, 'scribble_pidsafe': {'safe': True, 'scribble': True}, 'softedge_pidsafe': {'safe': True, 'scribble': False}, 'normal_bae': {}, 'lineart_realistic': {'coarse': False}, 'lineart_coarse': {'coarse': True}, 'lineart_anime': {}, 'canny': {}, 'shuffle': {}, 'depth_zoe': {}, 'depth_leres': {'boost': False}, 'depth_leres++': {'boost': True}, 'mediapipe_face': {}, } CHOICES = f"Choices for the processor are {list(MODELS.keys())}" class Processor: def __init__(self, processor_id: str, params: Optional[Dict] = None) -> None: """Processor that can be used to process images with controlnet aux processors Args: processor_id (str): processor name, options are 'hed, midas, mlsd, openpose, pidinet, normalbae, lineart, lineart_coarse, lineart_anime, canny, content_shuffle, zoe, mediapipe_face params (Optional[Dict]): parameters for the processor """ LOGGER.info(f"Loading {processor_id}") if processor_id not in MODELS: raise ValueError(f"{processor_id} is not a valid processor id. Please make sure to choose one of {', '.join(MODELS.keys())}") self.processor_id = processor_id self.processor = self.load_processor(self.processor_id) # load default params self.params = MODEL_PARAMS[self.processor_id] # update with user params if params: self.params.update(params) def load_processor(self, processor_id: str) -> 'Processor': """Load controlnet aux processors Args: processor_id (str): processor name Returns: Processor: controlnet aux processor """ processor = MODELS[processor_id]['class'] # check if the proecssor is a checkpoint model if MODELS[processor_id]['checkpoint']: processor = processor.from_pretrained("lllyasviel/Annotators") else: processor = processor() return processor def __call__(self, image: Union[Image.Image, bytes], to_pil: bool = True) -> Union[Image.Image, bytes]: """processes an image with a controlnet aux processor Args: image (Union[Image.Image, bytes]): input image in bytes or PIL Image to_pil (bool): whether to return bytes or PIL Image Returns: Union[Image.Image, bytes]: processed image in bytes or PIL Image """ # check if bytes or PIL Image if isinstance(image, bytes): image = Image.open(io.BytesIO(image)).convert("RGB") processed_image = self.processor(image, **self.params) if to_pil: return processed_image else: output_bytes = io.BytesIO() processed_image.save(output_bytes, format='JPEG') return output_bytes.getvalue() ``` # controlnet_aux_local/__init__.py ```py __version__ = "0.0.8" # from .hed import HEDdetector # from .leres import LeresDetector # from .lineart import LineartDetector # from .lineart_anime import LineartAnimeDetector # from .midas import MidasDetector # from .mlsd import MLSDdetector from .normalbae import NormalBaeDetector # from .open_pose import OpenposeDetector # from .pidi import PidiNetDetector # from .zoe import ZoeDetector # from .canny import CannyDetector # from .mediapipe_face import MediapipeFaceDetector # from .segment_anything import SamDetector # from .shuffle import ContentShuffleDetector # from .dwpose import DWposeDetector ```