Add fields to configs, fix issue with offload from bnb, remove extra random text code

Browse files

Files changed (11) hide show

configs/config-dev-cuda0.json +6 -1
configs/config-dev-eval.json +4 -2
configs/config-dev-offload.json +1 -1
configs/config-dev-prequant.json +6 -5
configs/config-dev.json +5 -3
configs/config-schnell-cuda0.json +8 -2
configs/config-schnell.json +8 -2
float8_quantize.py +1 -1
flux_pipeline.py +2 -27
image_encoder.py +0 -26
util.py +0 -22

configs/config-dev-cuda0.json CHANGED Viewed

@@ -47,5 +47,10 @@
   "flow_dtype": "float16",
   "ae_dtype": "bfloat16",
   "text_enc_dtype": "bfloat16",
-  "num_to_quant": 20
 }

   "flow_dtype": "float16",
   "ae_dtype": "bfloat16",
   "text_enc_dtype": "bfloat16",
+  "text_enc_quantization_dtype": "qfloat8",
+  "compile_extras": false,
+  "compile_blocks": false,
+  "offload_ae": false,
+  "offload_text_enc": false,
+  "offload_flow": false
 }

configs/config-dev-eval.json CHANGED Viewed

@@ -49,7 +49,9 @@
   "text_enc_dtype": "bfloat16",
   "flow_quantization_dtype": "qfloat8",
   "text_enc_quantization_dtype": "qfloat8",
-  "num_to_quant": 22,
   "compile_extras": false,
-  "compile_blocks": false
 }

   "text_enc_dtype": "bfloat16",
   "flow_quantization_dtype": "qfloat8",
   "text_enc_quantization_dtype": "qfloat8",
   "compile_extras": false,
+  "compile_blocks": false,
+  "offload_ae": false,
+  "offload_text_enc": false,
+  "offload_flow": false
 }

configs/config-dev-offload.json CHANGED Viewed

@@ -49,7 +49,7 @@
   "text_enc_dtype": "bfloat16",
   "flow_quantization_dtype": "qfloat8",
   "text_enc_quantization_dtype": "qint4",
-  "num_to_quant": 22,
   "compile_extras": false,
   "compile_blocks": false,
   "offload_text_encoder": true,

   "text_enc_dtype": "bfloat16",
   "flow_quantization_dtype": "qfloat8",
   "text_enc_quantization_dtype": "qint4",
+  "ae_quantization_dtype": "qfloat8",
   "compile_extras": false,
   "compile_blocks": false,
   "offload_text_encoder": true,

configs/config-dev-prequant.json CHANGED Viewed

@@ -47,10 +47,11 @@
   "flow_dtype": "float16",
   "ae_dtype": "bfloat16",
   "text_enc_dtype": "bfloat16",
-  "flow_quantization_dtype": "qfloat8",
   "text_enc_quantization_dtype": "qfloat8",
-  "num_to_quant": 22,
-  "compile_extras": true,
-  "compile_blocks": true,
-  "prequantized_flow": true
 }

   "flow_dtype": "float16",
   "ae_dtype": "bfloat16",
   "text_enc_dtype": "bfloat16",
   "text_enc_quantization_dtype": "qfloat8",
+  "compile_extras": false,
+  "compile_blocks": false,
+  "prequantized_flow": true,
+  "offload_ae": false,
+  "offload_text_enc": false,
+  "offload_flow": false
 }

configs/config-dev.json CHANGED Viewed

@@ -47,9 +47,11 @@
   "flow_dtype": "float16",
   "ae_dtype": "bfloat16",
   "text_enc_dtype": "bfloat16",
-  "flow_quantization_dtype": "qfloat8",
   "text_enc_quantization_dtype": "qfloat8",
-  "num_to_quant": 22,
   "compile_extras": true,
-  "compile_blocks": true
 }

   "flow_dtype": "float16",
   "ae_dtype": "bfloat16",
   "text_enc_dtype": "bfloat16",
   "text_enc_quantization_dtype": "qfloat8",
+  "ae_quantization_dtype": "qfloat8",
   "compile_extras": true,
+  "compile_blocks": true,
+  "offload_ae": false,
+  "offload_text_enc": false,
+  "offload_flow": false
 }

configs/config-schnell-cuda0.json CHANGED Viewed

@@ -16,7 +16,7 @@
     ],
     "theta": 10000,
     "qkv_bias": true,
-    "guidance_embed": true
   },
   "ae_params": {
     "resolution": 256,
@@ -47,5 +47,11 @@
   "flow_dtype": "float16",
   "ae_dtype": "bfloat16",
   "text_enc_dtype": "bfloat16",
-  "num_to_quant": 20
 }

     ],
     "theta": 10000,
     "qkv_bias": true,
+    "guidance_embed": false
   },
   "ae_params": {
     "resolution": 256,
   "flow_dtype": "float16",
   "ae_dtype": "bfloat16",
   "text_enc_dtype": "bfloat16",
+  "text_enc_quantization_dtype": "qfloat8",
+  "ae_quantization_dtype": "qfloat8",
+  "compile_extras": false,
+  "compile_blocks": false,
+  "offload_ae": false,
+  "offload_text_enc": false,
+  "offload_flow": false
 }

configs/config-schnell.json CHANGED Viewed

@@ -16,7 +16,7 @@
     ],
     "theta": 10000,
     "qkv_bias": true,
-    "guidance_embed": true
   },
   "ae_params": {
     "resolution": 256,
@@ -47,5 +47,11 @@
   "flow_dtype": "float16",
   "ae_dtype": "bfloat16",
   "text_enc_dtype": "bfloat16",
-  "num_to_quant": 20
 }

     ],
     "theta": 10000,
     "qkv_bias": true,
+    "guidance_embed": false
   },
   "ae_params": {
     "resolution": 256,
   "flow_dtype": "float16",
   "ae_dtype": "bfloat16",
   "text_enc_dtype": "bfloat16",
+  "text_enc_quantization_dtype": "qfloat8",
+  "ae_quantization_dtype": "qfloat8",
+  "compile_extras": true,
+  "compile_blocks": true,
+  "offload_ae": false,
+  "offload_text_enc": false,
+  "offload_flow": false
 }

float8_quantize.py CHANGED Viewed

@@ -12,7 +12,7 @@ from torch.compiler import is_compiling
 from torch import __version__
 from torch.version import cuda
-IS_TORCH_2_4 = __version__ >= (2, 4) and __version__ < (2, 4, 9)
 LT_TORCH_2_4 = __version__ < (2, 4)
 if LT_TORCH_2_4:
     if not hasattr(torch, "_scaled_mm"):

 from torch import __version__
 from torch.version import cuda
+IS_TORCH_2_4 = __version__ < (2, 4, 9)
 LT_TORCH_2_4 = __version__ < (2, 4)
 if LT_TORCH_2_4:
     if not hasattr(torch, "_scaled_mm"):

flux_pipeline.py CHANGED Viewed

@@ -165,8 +165,8 @@ class FluxPipeline:
         img_ids = img_ids[None].repeat(bs, 1, 1, 1).flatten(1, 2)
         if self.offload_text_encoder:
-            self.clip.to(self.device_clip)
-            self.t5.to(self.device_t5)
         vec, txt, txt_ids = get_weighted_text_embeddings_flux(
             self,
             prompt,
@@ -498,28 +498,3 @@ class FluxPipeline:
             t5_device=t5_device,
             config=config,
         )
-if __name__ == "__main__":
-    pipe = FluxPipeline.load_pipeline_from_config_path(
-        "configs/config-dev-prequant.json",
-    )
-    o = pipe.generate(
-        prompt="Street photography portrait of a beautiful asian woman in traditional clothing with golden hairpin and blue eyes, wearing a red kimono with dragon patterns",
-        height=1024,
-        width=576,
-        num_steps=24,
-        guidance=3.5,
-        seed=10,
-    )
-    open("out.jpg", "wb").write(o.read())
-    for x in range(2):
-        o = pipe.generate(
-            prompt="Street photography portrait of a beautiful asian woman in traditional clothing with golden hairpin and blue eyes, wearing a red kimono with dragon patterns",
-            height=1024,
-            width=576,
-            num_steps=24,
-            guidance=3.5,
-        )
-        open(f"out{x}.jpg", "wb").write(o.read())

         img_ids = img_ids[None].repeat(bs, 1, 1, 1).flatten(1, 2)
         if self.offload_text_encoder:
+            self.clip.cuda(self.device_clip)
+            self.t5.cuda(self.device_t5)
         vec, txt, txt_ids = get_weighted_text_embeddings_flux(
             self,
             prompt,
             t5_device=t5_device,
             config=config,
         )

image_encoder.py CHANGED Viewed

@@ -42,29 +42,3 @@ class ImageEncoder:
         im.save(iob, format="JPEG", quality=95)
         iob.seek(0)
         return iob.getvalue()
-def test_real_img():
-    from PIL import Image
-    import numpy as np
-    im = "out.jpg"
-    im = Image.open(im)
-    im = np.array(im)
-    img_hwc = torch.from_numpy(im).cuda().type(torch.float32)
-    img_chw = img_hwc.permute(2, 0, 1).contiguous()
-    img_gray = img_hwc.mean(dim=2, keepdim=False).contiguous().clamp(0, 255)
-    tj = ImageEncoder()
-    o = tj.encode_torch(img_chw)
-    o2 = tj.encode_torch(img_hwc)
-    o3 = tj.encode_torch(img_gray)
-    with open("out_chw.jpg", "wb") as f:
-        f.write(o2)
-    with open("out_hwc.jpg", "wb") as f:
-        f.write(o)
-    with open("out_gray.jpg", "wb") as f:
-        f.write(o3)
-if __name__ == "__main__":
-    test_real_img()

         im.save(iob, format="JPEG", quality=95)
         iob.seek(0)
         return iob.getvalue()

util.py CHANGED Viewed

@@ -290,25 +290,3 @@ def load_models_from_config(config: ModelSpec) -> LoadedModels:
         t5=t5,
         config=config,
     )
-if __name__ == "__main__":
-    p = "/big/generator-ui/flux-testing/flux/model-dir/flux1-dev.sft"
-    ae_p = "/big/generator-ui/flux-testing/flux/model-dir/ae.sft"
-    config = load_config(
-        ModelVersion.flux_dev,
-        flux_path=p,
-        ae_path=ae_p,
-        text_enc_path="city96/t5-v1_1-xxl-encoder-bf16",
-        text_enc_device="cuda:0",
-        ae_device="cuda:0",
-        flux_device="cuda:0",
-        flow_dtype="float16",
-        ae_dtype="bfloat16",
-        text_enc_dtype="bfloat16",
-        num_to_quant=20,
-    )
-    with open("configs/config-dev-cuda0.json", "w") as f:
-        json.dump(config.model_dump(), f, indent=2)
-    print(config)

         t5=t5,
         config=config,
     )