Initial commit with folder contents
Browse files- pyproject.toml +10 -6
- src/pipeline.py +104 -21
pyproject.toml
CHANGED
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
4 |
|
5 |
[project]
|
6 |
name = "flux-schnell-edge-inference"
|
7 |
-
description = "
|
8 |
requires-python = ">=3.10,<3.13"
|
9 |
version = "8"
|
10 |
dependencies = [
|
@@ -24,15 +24,19 @@ dependencies = [
|
|
24 |
[[tool.edge-maxxing.models]]
|
25 |
repository = "black-forest-labs/FLUX.1-schnell"
|
26 |
revision = "741f7c3ce8b383c54771c7003378a50191e9efe9"
|
27 |
-
exclude = ["transformer"]
|
28 |
|
29 |
[[tool.edge-maxxing.models]]
|
30 |
-
repository = "TrendForge/
|
31 |
-
revision = "
|
32 |
|
33 |
[[tool.edge-maxxing.models]]
|
34 |
-
repository = "TrendForge/
|
35 |
-
revision = "
|
|
|
|
|
|
|
|
|
36 |
|
37 |
[project.scripts]
|
38 |
start_inference = "main:main"
|
|
|
4 |
|
5 |
[project]
|
6 |
name = "flux-schnell-edge-inference"
|
7 |
+
description = "HitmanReborn"
|
8 |
requires-python = ">=3.10,<3.13"
|
9 |
version = "8"
|
10 |
dependencies = [
|
|
|
24 |
[[tool.edge-maxxing.models]]
|
25 |
repository = "black-forest-labs/FLUX.1-schnell"
|
26 |
revision = "741f7c3ce8b383c54771c7003378a50191e9efe9"
|
27 |
+
exclude = ["transformer", "vae", "text_encoder_2"]
|
28 |
|
29 |
[[tool.edge-maxxing.models]]
|
30 |
+
repository = "TrendForge/extra0manQ0"
|
31 |
+
revision = "dc2cda167b8f53792a98020a3ef2f21808b09bb4"
|
32 |
|
33 |
[[tool.edge-maxxing.models]]
|
34 |
+
repository = "TrendForge/extra1manQ1"
|
35 |
+
revision = "d302b6e39214ed4532be34ec337f93c7eef3eaa6"
|
36 |
+
|
37 |
+
[[tool.edge-maxxing.models]]
|
38 |
+
repository = "TrendForge/extra2manQ2"
|
39 |
+
revision = "cef012d2db2f5a006567e797a0b9130aea5449c1"
|
40 |
|
41 |
[project.scripts]
|
42 |
start_inference = "main:main"
|
src/pipeline.py
CHANGED
@@ -1,38 +1,122 @@
|
|
1 |
-
#
|
2 |
-
|
3 |
-
from transformers import T5EncoderModel, T5TokenizerFast, CLIPTokenizer, CLIPTextModel
|
4 |
import torch
|
5 |
import torch._dynamo
|
6 |
import gc
|
7 |
-
import os
|
8 |
-
from diffusers import FluxPipeline, AutoencoderKL, AutoencoderTiny
|
9 |
from PIL.Image import Image
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
from pipelines.models import TextToImageRequest
|
|
|
|
|
11 |
from torch import Generator
|
12 |
from diffusers import FluxTransformer2DModel, DiffusionPipeline
|
13 |
-
from torchao.quantization import quantize_, int8_weight_only, fpx_weight_only
|
14 |
|
15 |
-
|
16 |
os.environ["TOKENIZERS_PARALLELISM"] = "True"
|
17 |
torch._dynamo.config.suppress_errors = True
|
18 |
|
|
|
|
|
|
|
|
|
19 |
Pipeline = None
|
20 |
-
|
21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
22 |
|
23 |
def load_pipeline() -> Pipeline:
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
pipeline.to("cuda")
|
34 |
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
return pipeline
|
37 |
|
38 |
@torch.no_grad()
|
@@ -47,5 +131,4 @@ def infer(request: TextToImageRequest, pipeline: Pipeline) -> Image:
|
|
47 |
max_sequence_length=256,
|
48 |
height=request.height,
|
49 |
width=request.width,
|
50 |
-
).images[0]
|
51 |
-
|
|
|
1 |
+
# Coding
|
2 |
+
import os
|
|
|
3 |
import torch
|
4 |
import torch._dynamo
|
5 |
import gc
|
|
|
|
|
6 |
from PIL.Image import Image
|
7 |
+
from torchao.quantization import quantize_, int8_weight_only, fpx_weight_only
|
8 |
+
from huggingface_hub.constants import HF_HUB_CACHE
|
9 |
+
from transformers import T5EncoderModel, T5TokenizerFast, CLIPTokenizer, CLIPTextModel
|
10 |
+
|
11 |
+
|
12 |
+
from PIL.Image import Image
|
13 |
+
from diffusers import FluxPipeline, AutoencoderKL, AutoencoderTiny
|
14 |
from pipelines.models import TextToImageRequest
|
15 |
+
|
16 |
+
from PIL.Image import Image
|
17 |
from torch import Generator
|
18 |
from diffusers import FluxTransformer2DModel, DiffusionPipeline
|
|
|
19 |
|
20 |
+
|
21 |
os.environ["TOKENIZERS_PARALLELISM"] = "True"
|
22 |
torch._dynamo.config.suppress_errors = True
|
23 |
|
24 |
+
os.environ['PYTORCH_CUDA_ALLOC_CONF']="expandable_segments:True"
|
25 |
+
|
26 |
+
|
27 |
+
|
28 |
Pipeline = None
|
29 |
+
CHECKPOINT = "black-forest-labs/FLUX.1-schnell"
|
30 |
+
REVISION = "741f7c3ce8b383c54771c7003378a50191e9efe9"
|
31 |
+
|
32 |
+
class QuantativeAnalysis:
|
33 |
+
|
34 |
+
def __init__(self, model, num_bins=256, scale_ratio=1.0):
|
35 |
+
self.model = model
|
36 |
+
self.num_bins = num_bins
|
37 |
+
self.scale_ratio = scale_ratio
|
38 |
+
|
39 |
+
def apply(self):
|
40 |
+
for name, param in self.model.named_parameters():
|
41 |
+
if param.requires_grad:
|
42 |
+
with torch.no_grad():
|
43 |
+
param_min = param.min()
|
44 |
+
param_max = param.max()
|
45 |
+
if param_range > 0:
|
46 |
+
params = 0.8*param_min + 0.2*param_max
|
47 |
+
return self.model
|
48 |
+
|
49 |
+
class AttentionQuant:
|
50 |
+
def __init__(self, model, att_config):
|
51 |
+
self.model = model
|
52 |
+
self.att_config = att_config
|
53 |
+
|
54 |
+
def apply(self):
|
55 |
+
for name, param in self.model.named_parameters():
|
56 |
+
if param.requires_grad:
|
57 |
+
layer_name = name.split(".")[0]
|
58 |
+
if layer_name in self.att_config:
|
59 |
+
num_bins, scale_factor = self.att_config[layer_name]
|
60 |
+
with torch.no_grad():
|
61 |
+
# Normalize weights, apply binning, and rescale
|
62 |
+
param_min = param.min()
|
63 |
+
param_max = param.max()
|
64 |
+
param_range = param_max - param_min
|
65 |
+
|
66 |
+
if param_range > 0:
|
67 |
+
normalized = (param - param_min) / param_range
|
68 |
+
binned = torch.round(normalized * (num_bins - 1)) / (num_bins - 1)
|
69 |
+
rescaled = binned * param_range + param_mins
|
70 |
+
params.data.copy_(rescaled * scale_factor)
|
71 |
+
else:
|
72 |
+
params.data.zero_()
|
73 |
+
|
74 |
+
return self.model
|
75 |
|
76 |
def load_pipeline() -> Pipeline:
|
77 |
+
|
78 |
+
__t5_model = T5EncoderModel.from_pretrained("TrendForge/extra1manQ1",
|
79 |
+
revision = "d302b6e39214ed4532be34ec337f93c7eef3eaa6",
|
80 |
+
torch_dtype=torch.bfloat16).to(memory_format=torch.channels_last)
|
81 |
+
|
82 |
+
__text_encoder_2 = __t5_model
|
83 |
+
|
84 |
+
base_vae = AutoencoderTiny.from_pretrained("TrendForge/extra2manQ2",
|
85 |
+
revision="cef012d2db2f5a006567e797a0b9130aea5449c1",
|
86 |
+
torch_dtype=torch.bfloat16)
|
87 |
+
|
88 |
+
|
89 |
+
path = os.path.join(HF_HUB_CACHE, "models--TrendForge--extra0manQ0/snapshots/dc2cda167b8f53792a98020a3ef2f21808b09bb4")
|
90 |
+
base_trans = FluxTransformer2DModel.from_pretrained(path,
|
91 |
+
torch_dtype=torch.bfloat16,
|
92 |
+
use_safetensors=False).to(memory_format=torch.channels_last)
|
93 |
+
|
94 |
+
try:
|
95 |
+
att_config = {
|
96 |
+
"transformer_blocks.15.attn.norm_added_k.weight": (64, 0.1),
|
97 |
+
"transformer_blocks.15.attn.norm_added_q.weight": (64, 0.1),
|
98 |
+
"transformer_blocks.15.attn.norm_added_v.weight": (64, 0.1)
|
99 |
+
}
|
100 |
+
transformer = AttentionQuant(transformer, att_config).apply()
|
101 |
+
except:
|
102 |
+
|
103 |
+
transformer = base_trans
|
104 |
+
|
105 |
+
pipeline = DiffusionPipeline.from_pretrained(CHECKPOINT,
|
106 |
+
revision=REVISION,
|
107 |
+
vae=base_vae,
|
108 |
+
transformer=transformer,
|
109 |
+
text_encoder_2=__text_encoder_2,
|
110 |
+
torch_dtype=torch.bfloat16)
|
111 |
pipeline.to("cuda")
|
112 |
|
113 |
+
for _warmup_batch in range(3):
|
114 |
+
pipeline(prompt="forswearer, skullcap, Juglandales, bluelegs, cunila, carbro, Ammonites",
|
115 |
+
width=1024,
|
116 |
+
height=1024,
|
117 |
+
guidance_scale=0.0,
|
118 |
+
num_inference_steps=4,
|
119 |
+
max_sequence_length=256)
|
120 |
return pipeline
|
121 |
|
122 |
@torch.no_grad()
|
|
|
131 |
max_sequence_length=256,
|
132 |
height=request.height,
|
133 |
width=request.width,
|
134 |
+
).images[0]
|
|