Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
import numpy as np | |
import random | |
import gc | |
import json | |
import torch | |
import spaces | |
from huggingface_hub import hf_hub_download | |
from diffusers import ( | |
AutoencoderKL, | |
SD3Transformer2DModel, | |
StableDiffusion3Pipeline, | |
FlowMatchEulerDiscreteScheduler | |
) | |
from diffusers.loaders.single_file_utils import ( | |
convert_sd3_transformer_checkpoint_to_diffusers, | |
) | |
from transformers import ( | |
CLIPTextModelWithProjection, | |
CLIPTokenizer, | |
T5EncoderModel, | |
T5Tokenizer | |
) | |
from accelerate import init_empty_weights | |
from accelerate.utils import set_module_tensor_to_device | |
from safetensors import safe_open | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
model_repo_id = "stabilityai/stable-diffusion-3.5-large" | |
finetune_repo_id = "DoctorDiffusion/Absynth-2.0" | |
finetune_filename = "Absynth_SD3.5L_2.0.safetensors" | |
if torch.cuda.is_available(): | |
torch_dtype = torch.bfloat16 | |
else: | |
torch_dtype = torch.float32 | |
# Initialize transformer | |
config_file = hf_hub_download(repo_id=model_repo_id, filename="transformer/config.json") | |
with open(config_file, "r") as fp: | |
config = json.load(fp) | |
with init_empty_weights(): | |
transformer = SD3Transformer2DModel.from_config(config) | |
# Get transformer state dict and load | |
model_file = hf_hub_download(repo_id=finetune_repo_id, filename=finetune_filename) | |
state_dict = {} | |
with safe_open(model_file, framework="pt") as f: | |
for key in f.keys(): | |
state_dict[key] = f.get_tensor(key) | |
state_dict = convert_sd3_transformer_checkpoint_to_diffusers(state_dict) | |
for key, value in state_dict.items(): | |
set_module_tensor_to_device( | |
transformer, | |
key, | |
device, | |
value=value, | |
dtype=torch_dtype | |
) | |
# Try to keep memory usage down | |
del state_dict | |
gc.collect() | |
# Initialize models from base SD3.5 | |
vae = AutoencoderKL.from_pretrained(model_repo_id, subfolder="vae") | |
text_encoder = CLIPTextModelWithProjection.from_pretrained(model_repo_id, subfolder="text_encoder") | |
text_encoder_2 = CLIPTextModelWithProjection.from_pretrained(model_repo_id, subfolder="text_encoder_2") | |
text_encoder_3 = T5EncoderModel.from_pretrained(model_repo_id, subfolder="text_encoder_3") | |
tokenizer = CLIPTokenizer.from_pretrained(model_repo_id, subfolder="tokenizer") | |
tokenizer_2 = CLIPTokenizer.from_pretrained(model_repo_id, subfolder="tokenizer_2") | |
tokenizer_3 = T5Tokenizer.from_pretrained(model_repo_id, subfolder="tokenizer_3") | |
scheduler = FlowMatchEulerDiscreteScheduler.from_pretrained(model_repo_id, subfolder="scheduler") | |
# Create pipeline from our models | |
pipe = StableDiffusion3Pipeline( | |
vae=vae, | |
scheduler=scheduler, | |
text_encoder=text_encoder, | |
text_encoder_2=text_encoder_2, | |
text_encoder_3=text_encoder_3, | |
tokenizer=tokenizer, | |
tokenizer_2=tokenizer_2, | |
tokenizer_3=tokenizer_3, | |
transformer=transformer | |
) | |
pipe = pipe.to(device, dtype=torch_dtype) | |
MAX_SEED = np.iinfo(np.int32).max | |
MAX_IMAGE_SIZE = 1536 | |
def infer( | |
prompt, | |
negative_prompt="", | |
seed=42, | |
randomize_seed=False, | |
width=1024, | |
height=1024, | |
guidance_scale=4.5, | |
num_inference_steps=40, | |
progress=gr.Progress(track_tqdm=True), | |
): | |
if randomize_seed: | |
seed = random.randint(0, MAX_SEED) | |
generator = torch.Generator().manual_seed(seed) | |
image = pipe( | |
prompt=prompt, | |
negative_prompt=negative_prompt, | |
guidance_scale=guidance_scale, | |
num_inference_steps=num_inference_steps, | |
width=width, | |
height=height, | |
generator=generator, | |
).images[0] | |
return image, seed | |
examples = [ | |
"An astrounaut encounters an alien on the moon, photograph", | |
] | |
css = """ | |
#col-container { | |
margin: 0 auto; | |
max-width: 640px; | |
} | |
""" | |
with gr.Blocks(css=css) as demo: | |
with gr.Column(elem_id="col-container"): | |
gr.Markdown(" # [Absynth 2.0](https://huggingface.co/DoctorDiffusion/Absynth-2.0) by [DoctorDiffusion](https://civitai.com/user/doctor_diffusion)") | |
gr.Markdown("Finetuned from [Stable Diffusion 3.5 Large (8B)](https://huggingface.co/stabilityai/stable-diffusion-3.5-large) by [Stability AI](https://stability.ai/news/introducing-stable-diffusion-3-5).") | |
with gr.Row(): | |
prompt = gr.Text( | |
label="Prompt", | |
show_label=False, | |
max_lines=1, | |
placeholder="Enter your prompt", | |
container=False, | |
) | |
run_button = gr.Button("Run", scale=0, variant="primary") | |
result = gr.Image(label="Result", show_label=False) | |
with gr.Accordion("Advanced Settings", open=False): | |
negative_prompt = gr.Text( | |
label="Negative prompt", | |
max_lines=1, | |
placeholder="Enter a negative prompt", | |
) | |
seed = gr.Slider( | |
label="Seed", | |
minimum=0, | |
maximum=MAX_SEED, | |
step=1, | |
value=0, | |
) | |
randomize_seed = gr.Checkbox(label="Randomize seed", value=True) | |
with gr.Row(): | |
width = gr.Slider( | |
label="Width", | |
minimum=512, | |
maximum=MAX_IMAGE_SIZE, | |
step=32, | |
value=768, | |
) | |
height = gr.Slider( | |
label="Height", | |
minimum=512, | |
maximum=MAX_IMAGE_SIZE, | |
step=32, | |
value=1344, | |
) | |
with gr.Row(): | |
guidance_scale = gr.Slider( | |
label="Guidance scale", | |
minimum=0.0, | |
maximum=7.5, | |
step=0.1, | |
value=4.5, | |
) | |
num_inference_steps = gr.Slider( | |
label="Number of inference steps", | |
minimum=1, | |
maximum=50, | |
step=1, | |
value=40, | |
) | |
gr.Examples(examples=examples, inputs=[prompt], outputs=[result, seed], fn=infer, cache_examples=True, cache_mode="lazy") | |
gr.on( | |
triggers=[run_button.click, prompt.submit], | |
fn=infer, | |
inputs=[ | |
prompt, | |
negative_prompt, | |
seed, | |
randomize_seed, | |
width, | |
height, | |
guidance_scale, | |
num_inference_steps, | |
], | |
outputs=[result, seed], | |
) | |
if __name__ == "__main__": | |
demo.launch() | |