Fix non-prequantized inference

Files changed (4) hide show

cublas_linear.py DELETED Viewed

	@@ -1 +0,0 @@
1	- from cublas_ops import CublasLinear

modules/flux_model.py CHANGED Viewed

@@ -369,7 +369,7 @@ class Flux(nn.Module):
     Transformer model for flow matching on sequences.
     """
-    def __init__(self, params: FluxParams, dtype: torch.dtype = torch.bfloat16):
         super().__init__()
         self.dtype = dtype

     Transformer model for flow matching on sequences.
     """
+    def __init__(self, params: FluxParams, dtype: torch.dtype = torch.float16):
         super().__init__()
         self.dtype = dtype

modules/flux_model_f8.py CHANGED Viewed

@@ -370,7 +370,7 @@ class Flux(nn.Module):
     Transformer model for flow matching on sequences.
     """
-    def __init__(self, params: FluxParams, dtype: torch.dtype = torch.bfloat16):
         super().__init__()
         self.dtype = dtype

     Transformer model for flow matching on sequences.
     """
+    def __init__(self, params: FluxParams, dtype: torch.dtype = torch.float16):
         super().__init__()
         self.dtype = dtype

util.py CHANGED Viewed

@@ -211,6 +211,8 @@ def load_flow_model(config: ModelSpec) -> Flux:
         sd = load_sft(ckpt_path, device="cpu")
         missing, unexpected = model.load_state_dict(sd, strict=False, assign=True)
         print_load_warning(missing, unexpected)
     return model

         sd = load_sft(ckpt_path, device="cpu")
         missing, unexpected = model.load_state_dict(sd, strict=False, assign=True)
         print_load_warning(missing, unexpected)
+        if not config.prequantized_flow:
+            model.type(into_dtype(config.flow_dtype))
     return model