Spaces:
Sleeping
Sleeping
gaparmar
commited on
Commit
·
13ed5cd
1
Parent(s):
d3864b2
sketch demo
Browse files- app.py +4 -8
- src/pix2pix_turbo.py +37 -24
app.py
CHANGED
@@ -1,7 +1,3 @@
|
|
1 |
-
"""
|
2 |
-
3.43.1
|
3 |
-
"""
|
4 |
-
|
5 |
import os
|
6 |
import sys
|
7 |
import pdb
|
@@ -78,7 +74,8 @@ def run(image, prompt, prompt_template, style_name, seed, val_r):
|
|
78 |
print("sketch updated")
|
79 |
if image is None:
|
80 |
ones = Image.new("L", (512, 512), 255)
|
81 |
-
|
|
|
82 |
prompt = prompt_template.replace("{prompt}", prompt)
|
83 |
image = image.convert("RGB")
|
84 |
image_t = TF.to_tensor(image) > 0.5
|
@@ -234,8 +231,8 @@ with gr.Blocks(css="style.css") as demo:
|
|
234 |
<div class="pad2"> <button href="TODO" download="image" id="my-button-down" onclick='return theSketchDownloadFunction()'></button> </div>
|
235 |
</div>
|
236 |
""")
|
237 |
-
gr.Markdown("## Prompt", elem_id="tools_header")
|
238 |
-
prompt = gr.Textbox(label=
|
239 |
with gr.Row():
|
240 |
style = gr.Dropdown(label="Style", choices=STYLE_NAMES, value=DEFAULT_STYLE_NAME, scale=1)
|
241 |
prompt_temp = gr.Textbox(label="Prompt Style Template", value=styles[DEFAULT_STYLE_NAME], scale=2, max_lines=1)
|
@@ -269,4 +266,3 @@ with gr.Blocks(css="style.css") as demo:
|
|
269 |
|
270 |
if __name__ == "__main__":
|
271 |
demo.queue().launch(debug=True)
|
272 |
-
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
import sys
|
3 |
import pdb
|
|
|
74 |
print("sketch updated")
|
75 |
if image is None:
|
76 |
ones = Image.new("L", (512, 512), 255)
|
77 |
+
temp_uri = pil_image_to_data_uri(ones)
|
78 |
+
return ones, gr.update(link=temp_uri), gr.update(link=temp_uri)
|
79 |
prompt = prompt_template.replace("{prompt}", prompt)
|
80 |
image = image.convert("RGB")
|
81 |
image_t = TF.to_tensor(image) > 0.5
|
|
|
231 |
<div class="pad2"> <button href="TODO" download="image" id="my-button-down" onclick='return theSketchDownloadFunction()'></button> </div>
|
232 |
</div>
|
233 |
""")
|
234 |
+
# gr.Markdown("## Prompt", elem_id="tools_header")
|
235 |
+
prompt = gr.Textbox(label="Prompt", value="", show_label=True)
|
236 |
with gr.Row():
|
237 |
style = gr.Dropdown(label="Style", choices=STYLE_NAMES, value=DEFAULT_STYLE_NAME, scale=1)
|
238 |
prompt_temp = gr.Textbox(label="Prompt Style Template", value=styles[DEFAULT_STYLE_NAME], scale=2, max_lines=1)
|
|
|
266 |
|
267 |
if __name__ == "__main__":
|
268 |
demo.queue().launch(debug=True)
|
|
src/pix2pix_turbo.py
CHANGED
@@ -1,4 +1,6 @@
|
|
1 |
-
import os
|
|
|
|
|
2 |
import pdb
|
3 |
import copy
|
4 |
from tqdm import tqdm
|
@@ -7,11 +9,13 @@ from transformers import AutoTokenizer, PretrainedConfig, CLIPTextModel
|
|
7 |
from diffusers import AutoencoderKL, UNet2DConditionModel, DDPMScheduler
|
8 |
from diffusers.utils.peft_utils import set_weights_and_activate_adapters
|
9 |
from peft import LoraConfig
|
10 |
-
|
|
|
|
|
11 |
|
12 |
|
|
|
13 |
def my_vae_encoder_fwd(self, sample):
|
14 |
-
r"""The forward method of the `Encoder` class."""
|
15 |
sample = self.conv_in(sample)
|
16 |
l_blocks = []
|
17 |
# down
|
@@ -27,6 +31,7 @@ def my_vae_encoder_fwd(self, sample):
|
|
27 |
return sample
|
28 |
|
29 |
|
|
|
30 |
def my_vae_decoder_fwd(self,sample, latent_embeds = None):
|
31 |
sample = self.conv_in(sample)
|
32 |
upscale_dtype = next(iter(self.up_blocks.parameters())).dtype
|
@@ -76,21 +81,33 @@ class Pix2Pix_Turbo(torch.nn.Module):
|
|
76 |
vae = AutoencoderKL.from_pretrained("stabilityai/sd-turbo", subfolder="vae")
|
77 |
unet = UNet2DConditionModel.from_pretrained("stabilityai/sd-turbo", subfolder="unet")
|
78 |
|
79 |
-
if name=="
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
|
89 |
if name=="sketch_to_image_stochastic":
|
90 |
# download from url
|
91 |
-
url = "https://www.cs.cmu.edu/~
|
92 |
os.makedirs(ckpt_folder, exist_ok=True)
|
93 |
-
outf = os.path.join(ckpt_folder, "
|
94 |
if not os.path.exists(outf):
|
95 |
print(f"Downloading checkpoint to {outf}")
|
96 |
response = requests.get(url, stream=True)
|
@@ -105,7 +122,6 @@ class Pix2Pix_Turbo(torch.nn.Module):
|
|
105 |
if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
|
106 |
print("ERROR, something went wrong")
|
107 |
print(f"Downloaded successfully to {outf}")
|
108 |
-
# p_ckpt = "/home/gparmar/code/img2img-turbo/single_step_translation/notebooks/DEMO/sketch_to_image_stochastic.pkl"
|
109 |
p_ckpt = outf
|
110 |
sd = torch.load(p_ckpt, map_location="cpu")
|
111 |
unet_lora_config = LoraConfig(r=sd["rank_unet"], init_lora_weights="gaussian", target_modules=sd["unet_lora_target_modules"])
|
@@ -123,15 +139,17 @@ class Pix2Pix_Turbo(torch.nn.Module):
|
|
123 |
vae.decoder.ignore_skip = False
|
124 |
vae.add_adapter(vae_lora_config, adapter_name="vae_skip")
|
125 |
unet.add_adapter(unet_lora_config)
|
126 |
-
unet.
|
|
|
|
|
127 |
unet.enable_xformers_memory_efficient_attention()
|
128 |
-
|
129 |
-
|
|
|
130 |
unet.to("cuda")
|
131 |
vae.to("cuda")
|
132 |
unet.eval()
|
133 |
vae.eval()
|
134 |
-
|
135 |
self.unet, self.vae = unet, vae
|
136 |
self.timesteps = torch.tensor([999], device="cuda").long()
|
137 |
|
@@ -141,7 +159,6 @@ class Pix2Pix_Turbo(torch.nn.Module):
|
|
141 |
caption_tokens = self.tokenizer(prompt, max_length=self.tokenizer.model_max_length,
|
142 |
padding="max_length", truncation=True, return_tensors="pt").input_ids.cuda()
|
143 |
caption_enc = self.text_encoder(caption_tokens)[0]
|
144 |
-
|
145 |
if deterministic:
|
146 |
encoded_control = self.vae.encode(c_t).latent_dist.sample()*self.vae.config.scaling_factor
|
147 |
model_pred = self.unet(encoded_control, self.timesteps, encoder_hidden_states=caption_enc,).sample
|
@@ -161,8 +178,4 @@ class Pix2Pix_Turbo(torch.nn.Module):
|
|
161 |
x_denoised = self.sched.step(unet_output, self.timesteps, unet_input, return_dict=True).prev_sample
|
162 |
self.vae.decoder.incoming_skip_acts = self.vae.encoder.current_down_blocks
|
163 |
output_image = (self.vae.decode(x_denoised / self.vae.config.scaling_factor ).sample).clamp(-1,1)
|
164 |
-
|
165 |
return output_image
|
166 |
-
|
167 |
-
|
168 |
-
|
|
|
1 |
+
import os
|
2 |
+
import requests
|
3 |
+
import sys
|
4 |
import pdb
|
5 |
import copy
|
6 |
from tqdm import tqdm
|
|
|
9 |
from diffusers import AutoencoderKL, UNet2DConditionModel, DDPMScheduler
|
10 |
from diffusers.utils.peft_utils import set_weights_and_activate_adapters
|
11 |
from peft import LoraConfig
|
12 |
+
p = "src/"
|
13 |
+
sys.path.append(p)
|
14 |
+
from model import make_1step_sched
|
15 |
|
16 |
|
17 |
+
"""The forward method of the `Encoder` class."""
|
18 |
def my_vae_encoder_fwd(self, sample):
|
|
|
19 |
sample = self.conv_in(sample)
|
20 |
l_blocks = []
|
21 |
# down
|
|
|
31 |
return sample
|
32 |
|
33 |
|
34 |
+
"""The forward method of the `Decoder` class."""
|
35 |
def my_vae_decoder_fwd(self,sample, latent_embeds = None):
|
36 |
sample = self.conv_in(sample)
|
37 |
upscale_dtype = next(iter(self.up_blocks.parameters())).dtype
|
|
|
81 |
vae = AutoencoderKL.from_pretrained("stabilityai/sd-turbo", subfolder="vae")
|
82 |
unet = UNet2DConditionModel.from_pretrained("stabilityai/sd-turbo", subfolder="unet")
|
83 |
|
84 |
+
if name=="edge_to_image":
|
85 |
+
url = "https://www.cs.cmu.edu/~img2img-turbo/models/edge_to_image_loras.pkl"
|
86 |
+
os.makedirs(ckpt_folder, exist_ok=True)
|
87 |
+
outf = os.path.join(ckpt_folder, "edge_to_image_loras.pkl")
|
88 |
+
if not os.path.exists(outf):
|
89 |
+
print(f"Downloading checkpoint to {outf}")
|
90 |
+
response = requests.get(url, stream=True)
|
91 |
+
total_size_in_bytes= int(response.headers.get('content-length', 0))
|
92 |
+
block_size = 1024 # 1 Kibibyte
|
93 |
+
progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)
|
94 |
+
with open(outf, 'wb') as file:
|
95 |
+
for data in response.iter_content(block_size):
|
96 |
+
progress_bar.update(len(data))
|
97 |
+
file.write(data)
|
98 |
+
progress_bar.close()
|
99 |
+
if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
|
100 |
+
print("ERROR, something went wrong")
|
101 |
+
print(f"Downloaded successfully to {outf}")
|
102 |
+
p_ckpt = outf
|
103 |
+
sd = torch.load(p_ckpt, map_location="cpu")
|
104 |
+
unet_lora_config = LoraConfig(r=sd["rank_unet"], init_lora_weights="gaussian", target_modules=sd["unet_lora_target_modules"])
|
105 |
|
106 |
if name=="sketch_to_image_stochastic":
|
107 |
# download from url
|
108 |
+
url = "https://www.cs.cmu.edu/~img2img-turbo/models/sketch_to_image_stochastic_lora.pkl"
|
109 |
os.makedirs(ckpt_folder, exist_ok=True)
|
110 |
+
outf = os.path.join(ckpt_folder, "sketch_to_image_stochastic_lora.pkl")
|
111 |
if not os.path.exists(outf):
|
112 |
print(f"Downloading checkpoint to {outf}")
|
113 |
response = requests.get(url, stream=True)
|
|
|
122 |
if total_size_in_bytes != 0 and progress_bar.n != total_size_in_bytes:
|
123 |
print("ERROR, something went wrong")
|
124 |
print(f"Downloaded successfully to {outf}")
|
|
|
125 |
p_ckpt = outf
|
126 |
sd = torch.load(p_ckpt, map_location="cpu")
|
127 |
unet_lora_config = LoraConfig(r=sd["rank_unet"], init_lora_weights="gaussian", target_modules=sd["unet_lora_target_modules"])
|
|
|
139 |
vae.decoder.ignore_skip = False
|
140 |
vae.add_adapter(vae_lora_config, adapter_name="vae_skip")
|
141 |
unet.add_adapter(unet_lora_config)
|
142 |
+
_sd_unet = unet.state_dict()
|
143 |
+
for k in sd["state_dict_unet"]: _sd_unet[k] = sd["state_dict_unet"][k]
|
144 |
+
unet.load_state_dict(_sd_unet)
|
145 |
unet.enable_xformers_memory_efficient_attention()
|
146 |
+
_sd_vae = vae.state_dict()
|
147 |
+
for k in sd["state_dict_vae"]: _sd_vae[k] = sd["state_dict_vae"][k]
|
148 |
+
vae.load_state_dict(_sd_vae)
|
149 |
unet.to("cuda")
|
150 |
vae.to("cuda")
|
151 |
unet.eval()
|
152 |
vae.eval()
|
|
|
153 |
self.unet, self.vae = unet, vae
|
154 |
self.timesteps = torch.tensor([999], device="cuda").long()
|
155 |
|
|
|
159 |
caption_tokens = self.tokenizer(prompt, max_length=self.tokenizer.model_max_length,
|
160 |
padding="max_length", truncation=True, return_tensors="pt").input_ids.cuda()
|
161 |
caption_enc = self.text_encoder(caption_tokens)[0]
|
|
|
162 |
if deterministic:
|
163 |
encoded_control = self.vae.encode(c_t).latent_dist.sample()*self.vae.config.scaling_factor
|
164 |
model_pred = self.unet(encoded_control, self.timesteps, encoder_hidden_states=caption_enc,).sample
|
|
|
178 |
x_denoised = self.sched.step(unet_output, self.timesteps, unet_input, return_dict=True).prev_sample
|
179 |
self.vae.decoder.incoming_skip_acts = self.vae.encoder.current_down_blocks
|
180 |
output_image = (self.vae.decode(x_denoised / self.vae.config.scaling_factor ).sample).clamp(-1,1)
|
|
|
181 |
return output_image
|
|
|
|
|
|