Spaces:
vilarin
/
Running on Zero

vilarin commited on
Commit
9cdf1dd
·
verified ·
1 Parent(s): 4429dd4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -6
app.py CHANGED
@@ -13,7 +13,7 @@ from diffusers.utils import load_image
13
  from PIL import Image
14
  import requests
15
  import transformers
16
- from transformers import AutoTokenizer, T5EncoderModel
17
  from translatepy import Translator
18
 
19
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
@@ -53,10 +53,12 @@ transformer = SD3Transformer2DModel.from_pretrained(
53
  torch_dtype=torch.float16,
54
  )
55
 
 
 
56
  text_encoder_3 = T5EncoderModel.from_pretrained(
57
  repo,
58
  subfolder="text_encoder_3",
59
- torch_dtype=torch.float16,
60
  )
61
 
62
  tokenizer_3 = AutoTokenizer.from_pretrained(
@@ -65,23 +67,44 @@ tokenizer_3 = AutoTokenizer.from_pretrained(
65
  torch_dtype=torch.float16,
66
  )
67
 
 
 
 
 
 
 
 
68
  # Ensure model and scheduler are initialized in GPU-enabled function
69
  if torch.cuda.is_available():
70
  pipe = StableDiffusion3Pipeline.from_pretrained(
71
  repo,
72
- vae=vae,
73
- transformer=transformer,
74
  tokenizer_3=tokenizer_3,
75
  text_encoder_3=text_encoder_3,
76
  torch_dtype=torch.float16).to("cuda")
77
  pipe2 = StableDiffusion3Img2ImgPipeline.from_pretrained(
78
  repo,
79
- vae=vae,
80
- transformer=transformer,
81
  tokenizer_3=tokenizer_3,
82
  text_encoder_3=text_encoder_3,
83
  torch_dtype=torch.float16).to("cuda")
84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  pipe.scheduler = FlowMatchEulerDiscreteScheduler.from_config(pipe.scheduler.config)
86
  pipe2.scheduler = FlowMatchEulerDiscreteScheduler.from_config(pipe2.scheduler.config)
87
 
 
13
  from PIL import Image
14
  import requests
15
  import transformers
16
+ from transformers import AutoTokenizer, T5EncoderModel, BitsAndBytesConfig
17
  from translatepy import Translator
18
 
19
  os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
 
53
  torch_dtype=torch.float16,
54
  )
55
 
56
+ quantization_config = BitsAndBytesConfig(load_in_8bit=True)
57
+
58
  text_encoder_3 = T5EncoderModel.from_pretrained(
59
  repo,
60
  subfolder="text_encoder_3",
61
+ quantization_config=quantization_config,
62
  )
63
 
64
  tokenizer_3 = AutoTokenizer.from_pretrained(
 
67
  torch_dtype=torch.float16,
68
  )
69
 
70
+ torch.set_float32_matmul_precision("high")
71
+
72
+ torch._inductor.config.conv_1x1_as_mm = True
73
+ torch._inductor.config.coordinate_descent_tuning = True
74
+ torch._inductor.config.epilogue_fusion = False
75
+ torch._inductor.config.coordinate_descent_check_all_directions = True
76
+
77
  # Ensure model and scheduler are initialized in GPU-enabled function
78
  if torch.cuda.is_available():
79
  pipe = StableDiffusion3Pipeline.from_pretrained(
80
  repo,
 
 
81
  tokenizer_3=tokenizer_3,
82
  text_encoder_3=text_encoder_3,
83
  torch_dtype=torch.float16).to("cuda")
84
  pipe2 = StableDiffusion3Img2ImgPipeline.from_pretrained(
85
  repo,
 
 
86
  tokenizer_3=tokenizer_3,
87
  text_encoder_3=text_encoder_3,
88
  torch_dtype=torch.float16).to("cuda")
89
 
90
+ pipe.set_progress_bar_config(disable=True)
91
+
92
+ pipe.transformer.to(memory_format=torch.channels_last)
93
+ pipe.vae.to(memory_format=torch.channels_last)
94
+
95
+ pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune", fullgraph=True)
96
+ pipe.vae.decode = torch.compile(pipe.vae.decode, mode="max-autotune", fullgraph=True)
97
+
98
+
99
+ pipe2.set_progress_bar_config(disable=True)
100
+
101
+ pipe2.transformer.to(memory_format=torch.channels_last)
102
+ pipe2.vae.to(memory_format=torch.channels_last)
103
+
104
+ pipe2.transformer = torch.compile(pipe2.transformer, mode="max-autotune", fullgraph=True)
105
+ pipe2.vae.decode = torch.compile(pipe2.vae.decode, mode="max-autotune", fullgraph=True)
106
+
107
+
108
  pipe.scheduler = FlowMatchEulerDiscreteScheduler.from_config(pipe.scheduler.config)
109
  pipe2.scheduler = FlowMatchEulerDiscreteScheduler.from_config(pipe2.scheduler.config)
110