RealVis_v5.0_BF16_IP

Running on Zero

ford442 commited on Dec 6, 2024

Commit

fe5ba0f

1 Parent(s): 9c048b3

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -19,9 +19,6 @@ from typing import Tuple
 #from transformers import AutoTokenizer, AutoModelForCausalLM
 import paramiko
-from xformers.ops import MemoryEfficientAttentionFlashAttentionOp
 #os.system("chmod +x ./cusparselt.sh")
 #os.system("./cusparselt.sh")
 #os.system("chmod +x ./cudnn.sh")
@@ -113,16 +110,14 @@ def load_and_prepare_model(model_id):
         "ford442/RealVisXL_V5.0_BF16": torch.bfloat16,
     }
     dtype = model_dtypes.get(model_id, torch.bfloat16)  # Default to float32 if not found
-    #vae = AutoencoderKL.from_pretrained("ford442/sdxl-vae-bf16", torch_dtype=torch.bfloat16,safety_checker=None)
     pipe = StableDiffusionXLPipeline.from_pretrained(
         model_id,
         torch_dtype=torch.bfloat16,
         add_watermarker=False,
         use_safetensors=True,
     ).to(torch.bfloat16).to('cuda')
-    pipe.enable_xformers_memory_efficient_attention(attention_op=MemoryEfficientAttentionFlashAttentionOp)
-# Workaround for not accepting attention shape using VAE for Flash Attention
-    pipe.vae.enable_xformers_memory_efficient_attention(attention_op=None)
     pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
     if ENABLE_CPU_OFFLOAD:

 #from transformers import AutoTokenizer, AutoModelForCausalLM
 import paramiko
 #os.system("chmod +x ./cusparselt.sh")
 #os.system("./cusparselt.sh")
 #os.system("chmod +x ./cudnn.sh")
         "ford442/RealVisXL_V5.0_BF16": torch.bfloat16,
     }
     dtype = model_dtypes.get(model_id, torch.bfloat16)  # Default to float32 if not found
+    vae = AutoencoderKL.from_pretrained("ford442/sdxl-vae-bf16", torch_dtype=torch.bfloat16,safety_checker=None)
     pipe = StableDiffusionXLPipeline.from_pretrained(
         model_id,
         torch_dtype=torch.bfloat16,
         add_watermarker=False,
         use_safetensors=True,
+        vae=vae,
     ).to(torch.bfloat16).to('cuda')
     pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
     if ENABLE_CPU_OFFLOAD: