Nuullll commited on
Commit
0958e65
·
1 Parent(s): ca39fe5

[IPEX] Support xpu for Intel Arc GPU

Browse files

Arc A770 16G can render at ~3fps (fp16).

Files changed (2) hide show
  1. app-img2img.py +7 -2
  2. app-txt2img.py +7 -2
app-img2img.py CHANGED
@@ -12,6 +12,10 @@ from fastapi.staticfiles import StaticFiles
12
  from diffusers import DiffusionPipeline, AutoencoderTiny
13
  from compel import Compel
14
  import torch
 
 
 
 
15
  from PIL import Image
16
  import numpy as np
17
  import gradio as gr
@@ -31,7 +35,8 @@ USE_TINY_AUTOENCODER=True
31
 
32
  # check if MPS is available OSX only M1/M2/M3 chips
33
  mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
34
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
35
  torch_device = device
36
 
37
  # change to torch.float16 to save GPU memory
@@ -72,7 +77,7 @@ pipe.unet.to(memory_format=torch.channels_last)
72
  if psutil.virtual_memory().total < 64 * 1024**3:
73
  pipe.enable_attention_slicing()
74
 
75
- if not mps_available:
76
  pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
77
  pipe(prompt="warmup", image=[Image.new("RGB", (512, 512))])
78
 
 
12
  from diffusers import DiffusionPipeline, AutoencoderTiny
13
  from compel import Compel
14
  import torch
15
+ try:
16
+ import intel_extension_for_pytorch as ipex
17
+ except:
18
+ pass
19
  from PIL import Image
20
  import numpy as np
21
  import gradio as gr
 
35
 
36
  # check if MPS is available OSX only M1/M2/M3 chips
37
  mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
38
+ xpu_available = hasattr(torch, 'xpu') and torch.xpu.is_available()
39
+ device = torch.device("cuda" if torch.cuda.is_available() else "xpu" if xpu_available else "cpu")
40
  torch_device = device
41
 
42
  # change to torch.float16 to save GPU memory
 
77
  if psutil.virtual_memory().total < 64 * 1024**3:
78
  pipe.enable_attention_slicing()
79
 
80
+ if not mps_available and not xpu_available:
81
  pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
82
  pipe(prompt="warmup", image=[Image.new("RGB", (512, 512))])
83
 
app-txt2img.py CHANGED
@@ -12,6 +12,10 @@ from fastapi.staticfiles import StaticFiles
12
  from diffusers import DiffusionPipeline, AutoencoderTiny
13
  from compel import Compel
14
  import torch
 
 
 
 
15
  from PIL import Image
16
  import numpy as np
17
  import gradio as gr
@@ -32,7 +36,8 @@ USE_TINY_AUTOENCODER=True
32
 
33
  # check if MPS is available OSX only M1/M2/M3 chips
34
  mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
35
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
36
  torch_device = device
37
  # change to torch.float16 to save GPU memory
38
  torch_dtype = torch.float32
@@ -72,7 +77,7 @@ pipe.unet.to(memory_format=torch.channels_last)
72
  if psutil.virtual_memory().total < 64 * 1024**3:
73
  pipe.enable_attention_slicing()
74
 
75
- if not mps_available:
76
  pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
77
  pipe(prompt="warmup", num_inference_steps=1, guidance_scale=8.0)
78
 
 
12
  from diffusers import DiffusionPipeline, AutoencoderTiny
13
  from compel import Compel
14
  import torch
15
+ try:
16
+ import intel_extension_for_pytorch as ipex
17
+ except:
18
+ pass
19
  from PIL import Image
20
  import numpy as np
21
  import gradio as gr
 
36
 
37
  # check if MPS is available OSX only M1/M2/M3 chips
38
  mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available()
39
+ xpu_available = hasattr(torch, 'xpu') and torch.xpu.is_available()
40
+ device = torch.device("cuda" if torch.cuda.is_available() else "xpu" if xpu_available else "cpu")
41
  torch_device = device
42
  # change to torch.float16 to save GPU memory
43
  torch_dtype = torch.float32
 
77
  if psutil.virtual_memory().total < 64 * 1024**3:
78
  pipe.enable_attention_slicing()
79
 
80
+ if not mps_available and not xpu_available:
81
  pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
82
  pipe(prompt="warmup", num_inference_steps=1, guidance_scale=8.0)
83