RealVis_v5.0_BF16_IP

Running on Zero

App Files Files Community

ford442 commited on about 8 hours ago

Commit

2438e6b

verified ·

1 Parent(s): a2c6ec3

Update ip_adapter/ip_adapter.py

Browse files

Files changed (1) hide show

ip_adapter/ip_adapter.py +6 -6

ip_adapter/ip_adapter.py CHANGED Viewed

@@ -39,7 +39,7 @@ class IPAdapter:
         self.set_ip_adapter()
         # load image encoder
-        self.image_encoder = CLIPVisionModelWithProjection.from_pretrained(self.image_encoder_path).to(self.device, dtype=torch.float32)
         self.clip_image_processor = CLIPImageProcessor()
         # image proj model
         self.image_proj_model = self.init_proj()
@@ -50,7 +50,7 @@ class IPAdapter:
             cross_attention_dim=self.pipe.unet.config.cross_attention_dim,
             clip_embeddings_dim=self.image_encoder.config.projection_dim,
             clip_extra_context_tokens=self.num_tokens,
-        ).to(self.device, dtype=torch.float32)
         return image_proj_model
     def set_ip_adapter(self):
@@ -70,7 +70,7 @@ class IPAdapter:
                 attn_procs[name] = AttnProcessor()
             else:
                 attn_procs[name] = IPAttnProcessor(hidden_size=hidden_size, cross_attention_dim=cross_attention_dim,
-                scale=1.0,num_tokens= self.num_tokens).to(self.device, dtype=torch.float32)
         unet.set_attn_processor(attn_procs)
         if hasattr(self.pipe, "controlnet"):
             if isinstance(self.pipe.controlnet, MultiControlNetModel):
@@ -108,7 +108,7 @@ class IPAdapter:
             pil_image = [pil_image]
         clip_image = self.clip_image_processor(images=pil_image, return_tensors="pt").pixel_values
         print('clip_image_processor shape:',clip_image.shape)
-        clip_image_embeds = self.image_encoder(clip_image.to(self.device, dtype=torch.float32)).image_embeds
         print('image_encoder shape:',clip_image_embeds.shape)
         image_prompt_embeds = self.image_proj_model(clip_image_embeds)
         print('image_proj_model shape:',image_prompt_embeds.shape)
@@ -317,7 +317,7 @@ class IPAdapterPlus(IPAdapter):
             embedding_dim=self.image_encoder.config.hidden_size,
             output_dim=self.pipe.unet.config.cross_attention_dim,
             ff_mult=4
-        ).to(self.device, dtype=torch.float32)
         return image_proj_model
     @torch.inference_mode()
@@ -325,7 +325,7 @@ class IPAdapterPlus(IPAdapter):
         if isinstance(pil_image, Image.Image):
             pil_image = [pil_image]
         clip_image = self.clip_image_processor(images=pil_image, return_tensors="pt").pixel_values
-        clip_image = clip_image.to(self.device, dtype=torch.float32)
         clip_image_embeds = self.image_encoder(clip_image, output_hidden_states=True).hidden_states[-2]
         image_prompt_embeds = self.image_proj_model(clip_image_embeds)
         uncond_clip_image_embeds = self.image_encoder(torch.zeros_like(clip_image), output_hidden_states=True).hidden_states[-2]

         self.set_ip_adapter()
         # load image encoder
+        self.image_encoder = CLIPVisionModelWithProjection.from_pretrained(self.image_encoder_path).to(self.device, dtype=torch.bfloat16)
         self.clip_image_processor = CLIPImageProcessor()
         # image proj model
         self.image_proj_model = self.init_proj()
             cross_attention_dim=self.pipe.unet.config.cross_attention_dim,
             clip_embeddings_dim=self.image_encoder.config.projection_dim,
             clip_extra_context_tokens=self.num_tokens,
+        ).to(self.device, dtype=torch.bfloat16)
         return image_proj_model
     def set_ip_adapter(self):
                 attn_procs[name] = AttnProcessor()
             else:
                 attn_procs[name] = IPAttnProcessor(hidden_size=hidden_size, cross_attention_dim=cross_attention_dim,
+                scale=1.0,num_tokens= self.num_tokens).to(self.device, dtype=torch.bfloat16)
         unet.set_attn_processor(attn_procs)
         if hasattr(self.pipe, "controlnet"):
             if isinstance(self.pipe.controlnet, MultiControlNetModel):
             pil_image = [pil_image]
         clip_image = self.clip_image_processor(images=pil_image, return_tensors="pt").pixel_values
         print('clip_image_processor shape:',clip_image.shape)
+        clip_image_embeds = self.image_encoder(clip_image.to(self.device, dtype=torch.bfloat16)).image_embeds
         print('image_encoder shape:',clip_image_embeds.shape)
         image_prompt_embeds = self.image_proj_model(clip_image_embeds)
         print('image_proj_model shape:',image_prompt_embeds.shape)
             embedding_dim=self.image_encoder.config.hidden_size,
             output_dim=self.pipe.unet.config.cross_attention_dim,
             ff_mult=4
+        ).to(self.device, dtype=torch.bfloat16)
         return image_proj_model
     @torch.inference_mode()
         if isinstance(pil_image, Image.Image):
             pil_image = [pil_image]
         clip_image = self.clip_image_processor(images=pil_image, return_tensors="pt").pixel_values
+        clip_image = clip_image.to(self.device, dtype=torch.bfloat16)
         clip_image_embeds = self.image_encoder(clip_image, output_hidden_states=True).hidden_states[-2]
         image_prompt_embeds = self.image_proj_model(clip_image_embeds)
         uncond_clip_image_embeds = self.image_encoder(torch.zeros_like(clip_image), output_hidden_states=True).hidden_states[-2]