Spaces:
Runtime error
Runtime error
yoonhachoe
commited on
Commit
•
8d763c6
1
Parent(s):
fc9f69f
initial commit
Browse files- app.py +309 -0
- requirements.txt +6 -0
app.py
ADDED
@@ -0,0 +1,309 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import torch
|
3 |
+
import cv2
|
4 |
+
|
5 |
+
### CAM explainer code from Intel XAI tools (https://github.com/IntelAI/intel-xai-tools) ###
|
6 |
+
class XGradCAM:
|
7 |
+
def __init__(self, model, targetLayer, targetClass, image, dims, device):
|
8 |
+
|
9 |
+
# set any frozen layers to trainable
|
10 |
+
# gradcam cannot be calculated without it
|
11 |
+
for param in model.parameters():
|
12 |
+
if not param.requires_grad:
|
13 |
+
param.requires_grad = True
|
14 |
+
|
15 |
+
self.model = model
|
16 |
+
self.targetLayer = targetLayer
|
17 |
+
self.targetClass = targetClass
|
18 |
+
self.image = image
|
19 |
+
self.dims = dims
|
20 |
+
self.device = device
|
21 |
+
|
22 |
+
def visualize(self):
|
23 |
+
from pytorch_grad_cam import XGradCAM, GuidedBackpropReLUModel
|
24 |
+
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
|
25 |
+
from pytorch_grad_cam.utils.image import show_cam_on_image, deprocess_image, preprocess_image
|
26 |
+
import torch
|
27 |
+
import cv2
|
28 |
+
import numpy as np
|
29 |
+
import matplotlib.pyplot as plt
|
30 |
+
|
31 |
+
self.model.eval().to(self.device)
|
32 |
+
|
33 |
+
image = cv2.resize(self.image, self.dims)
|
34 |
+
# convert to rgb if image is grayscale
|
35 |
+
converted = False
|
36 |
+
if len(image.shape) == 2:
|
37 |
+
converted = True
|
38 |
+
image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
|
39 |
+
|
40 |
+
rgb_img = np.float32(image) / 255
|
41 |
+
input_tensor = preprocess_image(rgb_img,
|
42 |
+
mean=[0.485, 0.456, 0.406],
|
43 |
+
std=[0.229, 0.224, 0.225])
|
44 |
+
input_tensor = input_tensor.to(self.device)
|
45 |
+
|
46 |
+
self.targetLayer = [self.targetLayer]
|
47 |
+
|
48 |
+
if self.targetClass is None:
|
49 |
+
targets = None
|
50 |
+
else:
|
51 |
+
targets = [ClassifierOutputTarget(self.targetClass)]
|
52 |
+
|
53 |
+
cam = XGradCAM(self.model, self.targetLayer, use_cuda=torch.cuda.is_available())
|
54 |
+
|
55 |
+
# convert back to grayscale if that is the initial dim
|
56 |
+
if converted:
|
57 |
+
input_tensor = input_tensor[:, 0:1, :, :]
|
58 |
+
|
59 |
+
grayscale_cam = cam(input_tensor=input_tensor, targets=targets, aug_smooth=False,
|
60 |
+
eigen_smooth=False)
|
61 |
+
grayscale_cam = grayscale_cam[0, :]
|
62 |
+
cam_image = show_cam_on_image(rgb_img, grayscale_cam, use_rgb=True)
|
63 |
+
cam_image = cv2.cvtColor(cam_image, cv2.COLOR_RGB2BGR)
|
64 |
+
|
65 |
+
gb_model = GuidedBackpropReLUModel(model=self.model, use_cuda=torch.cuda.is_available())
|
66 |
+
gb = gb_model(input_tensor, target_category=None)
|
67 |
+
cam_mask = cv2.merge([grayscale_cam, grayscale_cam, grayscale_cam])
|
68 |
+
cam_gb = deprocess_image(cam_mask * gb)
|
69 |
+
gb = deprocess_image(gb)
|
70 |
+
|
71 |
+
print("XGradCAM, Guided backpropagation, and Guided XGradCAM are generated. ")
|
72 |
+
|
73 |
+
return cv2.cvtColor(cam_image, cv2.COLOR_RGB2BGR)
|
74 |
+
|
75 |
+
class EigenCAM:
|
76 |
+
def __init__(self, model, targetLayer, boxes, classes, colors, reshape, image, device):
|
77 |
+
self.model = model
|
78 |
+
self.targetLayer = targetLayer
|
79 |
+
self.boxes = boxes
|
80 |
+
self.classes = classes
|
81 |
+
self.colors = colors
|
82 |
+
self.reshape = reshape
|
83 |
+
self.image = image
|
84 |
+
self.device = device
|
85 |
+
|
86 |
+
def visualize(self):
|
87 |
+
from pytorch_grad_cam import EigenCAM
|
88 |
+
from pytorch_grad_cam.utils.image import show_cam_on_image, preprocess_image, scale_cam_image
|
89 |
+
import torchvision
|
90 |
+
import torch
|
91 |
+
import cv2
|
92 |
+
import numpy as np
|
93 |
+
|
94 |
+
self.model.eval().to(self.device)
|
95 |
+
|
96 |
+
rgb_img = np.float32(self.image) / 255
|
97 |
+
transform = torchvision.transforms.ToTensor()
|
98 |
+
input_tensor = transform(rgb_img)
|
99 |
+
input_tensor = input_tensor.unsqueeze(0)
|
100 |
+
input_tensor = input_tensor.to(self.device)
|
101 |
+
|
102 |
+
self.targetLayer = [self.targetLayer]
|
103 |
+
|
104 |
+
if self.reshape is None:
|
105 |
+
cam = EigenCAM(self.model, self.targetLayer, use_cuda=torch.cuda.is_available())
|
106 |
+
else:
|
107 |
+
cam = EigenCAM(self.model, self.targetLayer, use_cuda=torch.cuda.is_available(),
|
108 |
+
reshape_transform=self.reshape)
|
109 |
+
targets = []
|
110 |
+
grayscale_cam = cam(input_tensor=input_tensor, targets=targets, aug_smooth=False,
|
111 |
+
eigen_smooth=False)
|
112 |
+
grayscale_cam = grayscale_cam[0, :]
|
113 |
+
cam_image = show_cam_on_image(rgb_img, grayscale_cam, use_rgb=True)
|
114 |
+
|
115 |
+
renormalized_cam = np.zeros(grayscale_cam.shape, dtype=np.float32)
|
116 |
+
for x1, y1, x2, y2 in self.boxes:
|
117 |
+
renormalized_cam[y1:y2, x1:x2] = scale_cam_image(grayscale_cam[y1:y2, x1:x2].copy())
|
118 |
+
renormalized_cam = scale_cam_image(renormalized_cam)
|
119 |
+
eigencam_image_renormalized = show_cam_on_image(rgb_img, renormalized_cam, use_rgb=True)
|
120 |
+
for i, box in enumerate(self.boxes):
|
121 |
+
color = self.colors[i]
|
122 |
+
cv2.rectangle(
|
123 |
+
eigencam_image_renormalized,
|
124 |
+
(box[0], box[1]),
|
125 |
+
(box[2], box[3]),
|
126 |
+
color, 2
|
127 |
+
)
|
128 |
+
cv2.putText(eigencam_image_renormalized, self.classes[i], (box[0], box[1] - 5),
|
129 |
+
cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2,
|
130 |
+
lineType=cv2.LINE_AA)
|
131 |
+
|
132 |
+
print("EigenCAM is generated. ")
|
133 |
+
|
134 |
+
return eigencam_image_renormalized
|
135 |
+
|
136 |
+
### For Gradio Demo ###
|
137 |
+
def xgradcam(image, model_code, target_class):
|
138 |
+
global model, target_layer
|
139 |
+
exec(model_code, globals())
|
140 |
+
if target_class == "":
|
141 |
+
target_class = None
|
142 |
+
else:
|
143 |
+
target_class = int(target_class)
|
144 |
+
image_dims = (224, 224)
|
145 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
146 |
+
xgradcam = XGradCAM(model, target_layer, target_class, image, image_dims, device)
|
147 |
+
|
148 |
+
return xgradcam.visualize()
|
149 |
+
|
150 |
+
def eigencam(image, model_code, class_code, process_code, reshape_code):
|
151 |
+
global input_image, model, target_layer, bounding_box_coordinates, class_names, box_colors, reshape
|
152 |
+
input_image = cv2.resize(image, (640, 640))
|
153 |
+
exec(model_code, globals())
|
154 |
+
exec(class_code, globals())
|
155 |
+
exec(process_code, globals())
|
156 |
+
exec(reshape_code, globals())
|
157 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
158 |
+
eigencam = EigenCAM(model, target_layer, bounding_box_coordinates, class_names, box_colors, reshape, input_image, device)
|
159 |
+
|
160 |
+
return eigencam.visualize()
|
161 |
+
|
162 |
+
with gr.Blocks() as demo:
|
163 |
+
gr.Markdown(
|
164 |
+
"""
|
165 |
+
# Class Activation Mapping (CAM) Explainer Demo
|
166 |
+
This is a demo for CAM explainer from Intel XAI tools (https://github.com/IntelAI/intel-xai-tools). \
|
167 |
+
CAM is an approach which localizes regions in the image responsible for a class prediction. \
|
168 |
+
demo shows visualization of XGradCAM for object classification model and EigenCAM for object detection model.
|
169 |
+
"""
|
170 |
+
)
|
171 |
+
|
172 |
+
with gr.Tab("XGradCAM"):
|
173 |
+
with gr.Row():
|
174 |
+
with gr.Column():
|
175 |
+
xgradcam_image = gr.Image(label="Input Image")
|
176 |
+
gr.Markdown(
|
177 |
+
"""
|
178 |
+
Load the pretrained model to the variable <code>model</code> depending on how it was saved. Then, specify <code>target_layer</code> (normally the last convolutional layer) to compute CAM for. \
|
179 |
+
Here are some common choices:
|
180 |
+
- FasterRCNN: <code>model.backbone</code>
|
181 |
+
- ResNet18 and 50: <code>model.layer4</code>
|
182 |
+
- VGG and DenseNet161: <code>model.features</code>
|
183 |
+
|
184 |
+
Please don't change the variable names in the following code.
|
185 |
+
"""
|
186 |
+
)
|
187 |
+
xgradcam_model = gr.Code(label="Model and Target Layer", value=
|
188 |
+
"""
|
189 |
+
from torchvision.models import resnet50, ResNet50_Weights
|
190 |
+
|
191 |
+
model = resnet50(weights=ResNet50_Weights.IMAGENET1K_V2)
|
192 |
+
target_layer = model.layer4
|
193 |
+
""", language="python")
|
194 |
+
gr.Markdown(
|
195 |
+
"""
|
196 |
+
Enter the target category as an integer to compute CAM for. It is the category index in the range <code>[0, NUM_OF_CLASSES-1]</code> based on the training dataset. \
|
197 |
+
If it is left blank, the highest scoring category will be used.
|
198 |
+
"""
|
199 |
+
)
|
200 |
+
xgradcam_targetClass = gr.Textbox(label="Target Category")
|
201 |
+
xgradcam_output = gr.Image()
|
202 |
+
xgradcam_button = gr.Button("Submit")
|
203 |
+
|
204 |
+
with gr.Tab("EigenCAM"):
|
205 |
+
with gr.Row():
|
206 |
+
with gr.Column():
|
207 |
+
eigencam_image = gr.Image(label="Input Image")
|
208 |
+
gr.Markdown(
|
209 |
+
"""
|
210 |
+
Load the pretrained model to the variable <code>model</code> depending on how it was saved. Then, specify <code>target_layer</code> (normally the last convolutional layer) to compute CAM for. \
|
211 |
+
Here are some common choices:
|
212 |
+
- FasterRCNN: <code>model.backbone</code>
|
213 |
+
- ResNet18 and 50: <code>model.layer4</code>
|
214 |
+
- VGG and DenseNet161: <code>model.features</code>
|
215 |
+
|
216 |
+
Please don't change the variable names in the following code.
|
217 |
+
"""
|
218 |
+
)
|
219 |
+
eigencam_model = gr.Code(label="Model and Target Layer", value=
|
220 |
+
"""
|
221 |
+
from torchvision.models.detection import fasterrcnn_resnet50_fpn
|
222 |
+
|
223 |
+
model = fasterrcnn_resnet50_fpn(pretrained=True).eval()
|
224 |
+
target_layer = model.backbone
|
225 |
+
""", language="python")
|
226 |
+
gr.Markdown(
|
227 |
+
"""
|
228 |
+
In the case there is no class name in the output from the model, specify <code>class_labels</code> as a list to print them with corresponding bounding box in the image. \
|
229 |
+
Depending on the model, the class name might not be needed (e.g. YOLO). Then, create <code>color</code> as a list with a size of the number of classes.
|
230 |
+
"""
|
231 |
+
)
|
232 |
+
eigencam_class = gr.Code(label="Class Name", value=
|
233 |
+
"""
|
234 |
+
import numpy as np
|
235 |
+
|
236 |
+
class_labels = ['__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
|
237 |
+
'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A',
|
238 |
+
'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
|
239 |
+
'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella',
|
240 |
+
'N/A', 'N/A', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
|
241 |
+
'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard',
|
242 |
+
'surfboard', 'tennis racket', 'bottle', 'N/A', 'wine glass', 'cup', 'fork',
|
243 |
+
'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
|
244 |
+
'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
|
245 |
+
'potted plant', 'bed', 'N/A', 'dining table', 'N/A', 'N/A', 'toilet',
|
246 |
+
'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
|
247 |
+
'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book', 'clock', 'vase',
|
248 |
+
'scissors', 'teddy bear', 'hair drier', 'toothbrush']
|
249 |
+
color = np.random.uniform(0, 255, size=(len(class_labels), 3))
|
250 |
+
""", language="python")
|
251 |
+
gr.Markdown(
|
252 |
+
"""
|
253 |
+
Get <code>output</code> of the model (in the case of FasterRCNN, convert <code>input_image</code> to a tensor first). Then, write a custom <code>process_output</code> function to process the outputs from the model. \
|
254 |
+
You should get <code>bounding_box_coordinates</code>, <code>class_names</code>, and <code>box_colors</code> of the detected objects with a higher detection score than <code>detection_threshold</code> value. \
|
255 |
+
If you use other models than FasterRCNN, you need to make your own custom process function to match the structure of the outputs from this function.
|
256 |
+
"""
|
257 |
+
)
|
258 |
+
eigencam_process = gr.Code(label="Output Processing", value=
|
259 |
+
"""
|
260 |
+
import torchvision
|
261 |
+
|
262 |
+
transform = torchvision.transforms.ToTensor()
|
263 |
+
input_tensor = transform(np.float32(input_image) / 255).unsqueeze(0)
|
264 |
+
output = model(input_tensor)[0]
|
265 |
+
|
266 |
+
def process_output(output, class_labels, color, detection_threshold):
|
267 |
+
boxes, classes, labels, colors = [], [], [], []
|
268 |
+
box = output['boxes'].tolist()
|
269 |
+
name = [class_labels[i] for i in output['labels'].detach().numpy()]
|
270 |
+
label = output['labels'].detach().numpy()
|
271 |
+
for i in range(len(name)):
|
272 |
+
score = output['scores'].detach().numpy()[i]
|
273 |
+
if score < detection_threshold:
|
274 |
+
continue
|
275 |
+
boxes.append([int(b) for b in box[i]])
|
276 |
+
classes.append(name[i])
|
277 |
+
colors.append(color[label[i]])
|
278 |
+
|
279 |
+
return boxes, classes, colors
|
280 |
+
|
281 |
+
detection_threshold = 0.9
|
282 |
+
bounding_box_coordinates, class_names, box_colors = process_output(output, class_labels, color, detection_threshold)
|
283 |
+
""", language="python")
|
284 |
+
gr.Markdown(
|
285 |
+
"""
|
286 |
+
Write a custom <code>reshape</code> function to get the activations from the model and process them into 2D format. \
|
287 |
+
For example, the backbone of FasterRCNN outputs 5 different tenors with different spatial size as an Ordered Dict, \
|
288 |
+
thus, we need a custom function which aggregates these image tensors, resizes them to a common shape, and concatenates them. \
|
289 |
+
If you use other models than FasterRCNN, you need to write your own custom reshape function.
|
290 |
+
"""
|
291 |
+
)
|
292 |
+
eigencam_reshape = gr.Code(label="Reshape", value=
|
293 |
+
"""
|
294 |
+
def reshape(x):
|
295 |
+
target_size = x['pool'].size()[-2 : ]
|
296 |
+
activations = []
|
297 |
+
for key, value in x.items():
|
298 |
+
activations.append(torch.nn.functional.interpolate(torch.abs(value), target_size, mode='bilinear'))
|
299 |
+
activations = torch.cat(activations, axis=1)
|
300 |
+
|
301 |
+
return activations
|
302 |
+
""", language="python")
|
303 |
+
eigencam_output = gr.Image()
|
304 |
+
eigencam_button = gr.Button("Submit")
|
305 |
+
|
306 |
+
xgradcam_button.click(xgradcam, inputs=[xgradcam_image, xgradcam_model, xgradcam_targetClass], outputs=xgradcam_output)
|
307 |
+
eigencam_button.click(eigencam, inputs=[eigencam_image, eigencam_model, eigencam_class, eigencam_process, eigencam_reshape], outputs=eigencam_output)
|
308 |
+
|
309 |
+
demo.launch()
|
requirements.txt
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
grad-cam
|
2 |
+
matplotlib
|
3 |
+
numpy>=1.14.3,<1.23.0
|
4 |
+
opencv-python
|
5 |
+
scipy
|
6 |
+
torch==1.13.1
|