Spaces:

NightRaven109
/

TextureUpscaleBeta

Running on Zero

App Files Files Community

NightRaven109 commited on Dec 22, 2024

Commit

a495ef9

verified ·

1 Parent(s): eeb4ef5

Update app.py

Browse files

Files changed (1) hide show

app.py +75 -61

app.py CHANGED Viewed

@@ -24,6 +24,73 @@ generator = None
 accelerator = None
 model_path = None
 @spaces.GPU
 def initialize_models():
     global pipeline, generator, accelerator, model_path
@@ -41,62 +108,8 @@ def initialize_models():
             token=os.environ['Read2']
         )
-        # Load models from local directory
-        scheduler = DDPMScheduler.from_pretrained(
-            os.path.join(model_path, "stable-diffusion-2-1-base/scheduler")
-        )
-        text_encoder = CLIPTextModel.from_pretrained(
-            os.path.join(model_path, "stable-diffusion-2-1-base/text_encoder")
-        )
-        tokenizer = CLIPTokenizer.from_pretrained(
-            os.path.join(model_path, "stable-diffusion-2-1-base/tokenizer")
-        )
-        feature_extractor = CLIPImageProcessor.from_pretrained(
-            os.path.join(model_path, "stable-diffusion-2-1-base/feature_extractor")
-        )
-        unet = UNet2DConditionModel.from_pretrained(
-            os.path.join(model_path, "stable-diffusion-2-1-base/unet")
-        )
-        controlnet = ControlNetModel.from_pretrained(
-            os.path.join(model_path, "Controlnet")
-        )
-        vae = AutoencoderKL.from_pretrained(
-            os.path.join(model_path, "vae")
-        )
-        # Freeze models
-        for model in [vae, text_encoder, unet, controlnet]:
-            model.requires_grad_(False)
-        # Initialize pipeline
-        pipeline = StableDiffusionControlNetPipeline(
-            vae=vae,
-            text_encoder=text_encoder,
-            tokenizer=tokenizer,
-            feature_extractor=feature_extractor,
-            unet=unet,
-            controlnet=controlnet,
-            scheduler=scheduler,
-            safety_checker=None,
-            requires_safety_checker=False,
-        )
-        # Get weight dtype based on mixed precision
-        weight_dtype = torch.float32
-        if accelerator.mixed_precision == "fp16":
-            weight_dtype = torch.float16
-        elif accelerator.mixed_precision == "bf16":
-            weight_dtype = torch.bfloat16
-        # Move models to device with appropriate dtype
-        for model in [text_encoder, vae, unet, controlnet]:
-            model.to(accelerator.device, dtype=weight_dtype)
         # Initialize generator
         generator = torch.Generator(device=accelerator.device)
@@ -149,6 +162,8 @@ def process_image(
                 t_max=0.6666,
                 t_min=0.0,
                 tile_diffusion=False,
                 added_prompt=prompt,
                 image=input_pil,
                 num_inference_steps=num_inference_steps,
@@ -158,6 +173,9 @@ def process_image(
                 guidance_scale=guidance_scale,
                 negative_prompt=negative_prompt,
                 conditioning_scale=conditioning_scale,
             )
         generated_image = output.images[0]
@@ -193,11 +211,7 @@ iface = gr.Interface(
     ],
     outputs=gr.Image(label="Generated Image"),
     title="Controllable Conditional Super-Resolution",
-    description="Upload an image to enhance its resolution using CCSR.",
-    examples=[
-        ["example1.jpg", "clean, sharp, detailed", "blurry, noise", 1.0, 1.0, 20, 42, 2, "adain"],
-        ["example2.jpg", "high-resolution, pristine", "artifacts, pixelated", 1.5, 1.0, 30, 123, 2, "wavelet"],
-    ]
 )
 if __name__ == "__main__":

 accelerator = None
 model_path = None
+def load_pipeline(accelerator, model_path):
+    # Load scheduler
+    scheduler = DDPMScheduler.from_pretrained(
+        model_path,
+        subfolder="stable-diffusion-2-1-base/scheduler"
+    )
+    # Load models
+    text_encoder = CLIPTextModel.from_pretrained(
+        model_path,
+        subfolder="stable-diffusion-2-1-base/text_encoder"
+    )
+    tokenizer = CLIPTokenizer.from_pretrained(
+        model_path,
+        subfolder="stable-diffusion-2-1-base/tokenizer"
+    )
+    feature_extractor = CLIPImageProcessor.from_pretrained(
+        os.path.join(model_path, "stable-diffusion-2-1-base/feature_extractor")
+    )
+    unet = UNet2DConditionModel.from_pretrained(
+        model_path,
+        subfolder="stable-diffusion-2-1-base/unet"
+    )
+    controlnet = ControlNetModel.from_pretrained(
+        model_path,
+        subfolder="Controlnet"
+    )
+    vae = AutoencoderKL.from_pretrained(
+        model_path,
+        subfolder="vae"
+    )
+    # Freeze models
+    for model in [vae, text_encoder, unet, controlnet]:
+        model.requires_grad_(False)
+    # Initialize pipeline
+    pipeline = StableDiffusionControlNetPipeline(
+        vae=vae,
+        text_encoder=text_encoder,
+        tokenizer=tokenizer,
+        feature_extractor=feature_extractor,
+        unet=unet,
+        controlnet=controlnet,
+        scheduler=scheduler,
+        safety_checker=None,
+        requires_safety_checker=False,
+    )
+    # Set weight dtype based on mixed precision
+    weight_dtype = torch.float32
+    if accelerator.mixed_precision == "fp16":
+        weight_dtype = torch.float16
+    elif accelerator.mixed_precision == "bf16":
+        weight_dtype = torch.bfloat16
+    # Move models to accelerator device with appropriate dtype
+    for model in [text_encoder, vae, unet, controlnet]:
+        model.to(accelerator.device, dtype=weight_dtype)
+    return pipeline
 @spaces.GPU
 def initialize_models():
     global pipeline, generator, accelerator, model_path
             token=os.environ['Read2']
         )
+        # Load pipeline using the original loading function
+        pipeline = load_pipeline(accelerator, model_path)
         # Initialize generator
         generator = torch.Generator(device=accelerator.device)
                 t_max=0.6666,
                 t_min=0.0,
                 tile_diffusion=False,
+                tile_diffusion_size=512,
+                tile_diffusion_stride=256,
                 added_prompt=prompt,
                 image=input_pil,
                 num_inference_steps=num_inference_steps,
                 guidance_scale=guidance_scale,
                 negative_prompt=negative_prompt,
                 conditioning_scale=conditioning_scale,
+                start_steps=999,
+                start_point='lr',
+                use_vae_encode_condition=False
             )
         generated_image = output.images[0]
     ],
     outputs=gr.Image(label="Generated Image"),
     title="Controllable Conditional Super-Resolution",
+    description="Upload an image to enhance its resolution using CCSR."
 )
 if __name__ == "__main__":