yoonhachoe commited on
Commit
8d763c6
1 Parent(s): fc9f69f

initial commit

Browse files
Files changed (2) hide show
  1. app.py +309 -0
  2. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,309 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import cv2
4
+
5
+ ### CAM explainer code from Intel XAI tools (https://github.com/IntelAI/intel-xai-tools) ###
6
+ class XGradCAM:
7
+ def __init__(self, model, targetLayer, targetClass, image, dims, device):
8
+
9
+ # set any frozen layers to trainable
10
+ # gradcam cannot be calculated without it
11
+ for param in model.parameters():
12
+ if not param.requires_grad:
13
+ param.requires_grad = True
14
+
15
+ self.model = model
16
+ self.targetLayer = targetLayer
17
+ self.targetClass = targetClass
18
+ self.image = image
19
+ self.dims = dims
20
+ self.device = device
21
+
22
+ def visualize(self):
23
+ from pytorch_grad_cam import XGradCAM, GuidedBackpropReLUModel
24
+ from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
25
+ from pytorch_grad_cam.utils.image import show_cam_on_image, deprocess_image, preprocess_image
26
+ import torch
27
+ import cv2
28
+ import numpy as np
29
+ import matplotlib.pyplot as plt
30
+
31
+ self.model.eval().to(self.device)
32
+
33
+ image = cv2.resize(self.image, self.dims)
34
+ # convert to rgb if image is grayscale
35
+ converted = False
36
+ if len(image.shape) == 2:
37
+ converted = True
38
+ image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
39
+
40
+ rgb_img = np.float32(image) / 255
41
+ input_tensor = preprocess_image(rgb_img,
42
+ mean=[0.485, 0.456, 0.406],
43
+ std=[0.229, 0.224, 0.225])
44
+ input_tensor = input_tensor.to(self.device)
45
+
46
+ self.targetLayer = [self.targetLayer]
47
+
48
+ if self.targetClass is None:
49
+ targets = None
50
+ else:
51
+ targets = [ClassifierOutputTarget(self.targetClass)]
52
+
53
+ cam = XGradCAM(self.model, self.targetLayer, use_cuda=torch.cuda.is_available())
54
+
55
+ # convert back to grayscale if that is the initial dim
56
+ if converted:
57
+ input_tensor = input_tensor[:, 0:1, :, :]
58
+
59
+ grayscale_cam = cam(input_tensor=input_tensor, targets=targets, aug_smooth=False,
60
+ eigen_smooth=False)
61
+ grayscale_cam = grayscale_cam[0, :]
62
+ cam_image = show_cam_on_image(rgb_img, grayscale_cam, use_rgb=True)
63
+ cam_image = cv2.cvtColor(cam_image, cv2.COLOR_RGB2BGR)
64
+
65
+ gb_model = GuidedBackpropReLUModel(model=self.model, use_cuda=torch.cuda.is_available())
66
+ gb = gb_model(input_tensor, target_category=None)
67
+ cam_mask = cv2.merge([grayscale_cam, grayscale_cam, grayscale_cam])
68
+ cam_gb = deprocess_image(cam_mask * gb)
69
+ gb = deprocess_image(gb)
70
+
71
+ print("XGradCAM, Guided backpropagation, and Guided XGradCAM are generated. ")
72
+
73
+ return cv2.cvtColor(cam_image, cv2.COLOR_RGB2BGR)
74
+
75
+ class EigenCAM:
76
+ def __init__(self, model, targetLayer, boxes, classes, colors, reshape, image, device):
77
+ self.model = model
78
+ self.targetLayer = targetLayer
79
+ self.boxes = boxes
80
+ self.classes = classes
81
+ self.colors = colors
82
+ self.reshape = reshape
83
+ self.image = image
84
+ self.device = device
85
+
86
+ def visualize(self):
87
+ from pytorch_grad_cam import EigenCAM
88
+ from pytorch_grad_cam.utils.image import show_cam_on_image, preprocess_image, scale_cam_image
89
+ import torchvision
90
+ import torch
91
+ import cv2
92
+ import numpy as np
93
+
94
+ self.model.eval().to(self.device)
95
+
96
+ rgb_img = np.float32(self.image) / 255
97
+ transform = torchvision.transforms.ToTensor()
98
+ input_tensor = transform(rgb_img)
99
+ input_tensor = input_tensor.unsqueeze(0)
100
+ input_tensor = input_tensor.to(self.device)
101
+
102
+ self.targetLayer = [self.targetLayer]
103
+
104
+ if self.reshape is None:
105
+ cam = EigenCAM(self.model, self.targetLayer, use_cuda=torch.cuda.is_available())
106
+ else:
107
+ cam = EigenCAM(self.model, self.targetLayer, use_cuda=torch.cuda.is_available(),
108
+ reshape_transform=self.reshape)
109
+ targets = []
110
+ grayscale_cam = cam(input_tensor=input_tensor, targets=targets, aug_smooth=False,
111
+ eigen_smooth=False)
112
+ grayscale_cam = grayscale_cam[0, :]
113
+ cam_image = show_cam_on_image(rgb_img, grayscale_cam, use_rgb=True)
114
+
115
+ renormalized_cam = np.zeros(grayscale_cam.shape, dtype=np.float32)
116
+ for x1, y1, x2, y2 in self.boxes:
117
+ renormalized_cam[y1:y2, x1:x2] = scale_cam_image(grayscale_cam[y1:y2, x1:x2].copy())
118
+ renormalized_cam = scale_cam_image(renormalized_cam)
119
+ eigencam_image_renormalized = show_cam_on_image(rgb_img, renormalized_cam, use_rgb=True)
120
+ for i, box in enumerate(self.boxes):
121
+ color = self.colors[i]
122
+ cv2.rectangle(
123
+ eigencam_image_renormalized,
124
+ (box[0], box[1]),
125
+ (box[2], box[3]),
126
+ color, 2
127
+ )
128
+ cv2.putText(eigencam_image_renormalized, self.classes[i], (box[0], box[1] - 5),
129
+ cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2,
130
+ lineType=cv2.LINE_AA)
131
+
132
+ print("EigenCAM is generated. ")
133
+
134
+ return eigencam_image_renormalized
135
+
136
+ ### For Gradio Demo ###
137
+ def xgradcam(image, model_code, target_class):
138
+ global model, target_layer
139
+ exec(model_code, globals())
140
+ if target_class == "":
141
+ target_class = None
142
+ else:
143
+ target_class = int(target_class)
144
+ image_dims = (224, 224)
145
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
146
+ xgradcam = XGradCAM(model, target_layer, target_class, image, image_dims, device)
147
+
148
+ return xgradcam.visualize()
149
+
150
+ def eigencam(image, model_code, class_code, process_code, reshape_code):
151
+ global input_image, model, target_layer, bounding_box_coordinates, class_names, box_colors, reshape
152
+ input_image = cv2.resize(image, (640, 640))
153
+ exec(model_code, globals())
154
+ exec(class_code, globals())
155
+ exec(process_code, globals())
156
+ exec(reshape_code, globals())
157
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
158
+ eigencam = EigenCAM(model, target_layer, bounding_box_coordinates, class_names, box_colors, reshape, input_image, device)
159
+
160
+ return eigencam.visualize()
161
+
162
+ with gr.Blocks() as demo:
163
+ gr.Markdown(
164
+ """
165
+ # Class Activation Mapping (CAM) Explainer Demo
166
+ This is a demo for CAM explainer from Intel XAI tools (https://github.com/IntelAI/intel-xai-tools). \
167
+ CAM is an approach which localizes regions in the image responsible for a class prediction. \
168
+ demo shows visualization of XGradCAM for object classification model and EigenCAM for object detection model.
169
+ """
170
+ )
171
+
172
+ with gr.Tab("XGradCAM"):
173
+ with gr.Row():
174
+ with gr.Column():
175
+ xgradcam_image = gr.Image(label="Input Image")
176
+ gr.Markdown(
177
+ """
178
+ Load the pretrained model to the variable <code>model</code> depending on how it was saved. Then, specify <code>target_layer</code> (normally the last convolutional layer) to compute CAM for. \
179
+ Here are some common choices:
180
+ - FasterRCNN: <code>model.backbone</code>
181
+ - ResNet18 and 50: <code>model.layer4</code>
182
+ - VGG and DenseNet161: <code>model.features</code>
183
+
184
+ Please don't change the variable names in the following code.
185
+ """
186
+ )
187
+ xgradcam_model = gr.Code(label="Model and Target Layer", value=
188
+ """
189
+ from torchvision.models import resnet50, ResNet50_Weights
190
+
191
+ model = resnet50(weights=ResNet50_Weights.IMAGENET1K_V2)
192
+ target_layer = model.layer4
193
+ """, language="python")
194
+ gr.Markdown(
195
+ """
196
+ Enter the target category as an integer to compute CAM for. It is the category index in the range <code>[0, NUM_OF_CLASSES-1]</code> based on the training dataset. \
197
+ If it is left blank, the highest scoring category will be used.
198
+ """
199
+ )
200
+ xgradcam_targetClass = gr.Textbox(label="Target Category")
201
+ xgradcam_output = gr.Image()
202
+ xgradcam_button = gr.Button("Submit")
203
+
204
+ with gr.Tab("EigenCAM"):
205
+ with gr.Row():
206
+ with gr.Column():
207
+ eigencam_image = gr.Image(label="Input Image")
208
+ gr.Markdown(
209
+ """
210
+ Load the pretrained model to the variable <code>model</code> depending on how it was saved. Then, specify <code>target_layer</code> (normally the last convolutional layer) to compute CAM for. \
211
+ Here are some common choices:
212
+ - FasterRCNN: <code>model.backbone</code>
213
+ - ResNet18 and 50: <code>model.layer4</code>
214
+ - VGG and DenseNet161: <code>model.features</code>
215
+
216
+ Please don't change the variable names in the following code.
217
+ """
218
+ )
219
+ eigencam_model = gr.Code(label="Model and Target Layer", value=
220
+ """
221
+ from torchvision.models.detection import fasterrcnn_resnet50_fpn
222
+
223
+ model = fasterrcnn_resnet50_fpn(pretrained=True).eval()
224
+ target_layer = model.backbone
225
+ """, language="python")
226
+ gr.Markdown(
227
+ """
228
+ In the case there is no class name in the output from the model, specify <code>class_labels</code> as a list to print them with corresponding bounding box in the image. \
229
+ Depending on the model, the class name might not be needed (e.g. YOLO). Then, create <code>color</code> as a list with a size of the number of classes.
230
+ """
231
+ )
232
+ eigencam_class = gr.Code(label="Class Name", value=
233
+ """
234
+ import numpy as np
235
+
236
+ class_labels = ['__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
237
+ 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A',
238
+ 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
239
+ 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella',
240
+ 'N/A', 'N/A', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
241
+ 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard',
242
+ 'surfboard', 'tennis racket', 'bottle', 'N/A', 'wine glass', 'cup', 'fork',
243
+ 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
244
+ 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
245
+ 'potted plant', 'bed', 'N/A', 'dining table', 'N/A', 'N/A', 'toilet',
246
+ 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
247
+ 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book', 'clock', 'vase',
248
+ 'scissors', 'teddy bear', 'hair drier', 'toothbrush']
249
+ color = np.random.uniform(0, 255, size=(len(class_labels), 3))
250
+ """, language="python")
251
+ gr.Markdown(
252
+ """
253
+ Get <code>output</code> of the model (in the case of FasterRCNN, convert <code>input_image</code> to a tensor first). Then, write a custom <code>process_output</code> function to process the outputs from the model. \
254
+ You should get <code>bounding_box_coordinates</code>, <code>class_names</code>, and <code>box_colors</code> of the detected objects with a higher detection score than <code>detection_threshold</code> value. \
255
+ If you use other models than FasterRCNN, you need to make your own custom process function to match the structure of the outputs from this function.
256
+ """
257
+ )
258
+ eigencam_process = gr.Code(label="Output Processing", value=
259
+ """
260
+ import torchvision
261
+
262
+ transform = torchvision.transforms.ToTensor()
263
+ input_tensor = transform(np.float32(input_image) / 255).unsqueeze(0)
264
+ output = model(input_tensor)[0]
265
+
266
+ def process_output(output, class_labels, color, detection_threshold):
267
+ boxes, classes, labels, colors = [], [], [], []
268
+ box = output['boxes'].tolist()
269
+ name = [class_labels[i] for i in output['labels'].detach().numpy()]
270
+ label = output['labels'].detach().numpy()
271
+ for i in range(len(name)):
272
+ score = output['scores'].detach().numpy()[i]
273
+ if score < detection_threshold:
274
+ continue
275
+ boxes.append([int(b) for b in box[i]])
276
+ classes.append(name[i])
277
+ colors.append(color[label[i]])
278
+
279
+ return boxes, classes, colors
280
+
281
+ detection_threshold = 0.9
282
+ bounding_box_coordinates, class_names, box_colors = process_output(output, class_labels, color, detection_threshold)
283
+ """, language="python")
284
+ gr.Markdown(
285
+ """
286
+ Write a custom <code>reshape</code> function to get the activations from the model and process them into 2D format. \
287
+ For example, the backbone of FasterRCNN outputs 5 different tenors with different spatial size as an Ordered Dict, \
288
+ thus, we need a custom function which aggregates these image tensors, resizes them to a common shape, and concatenates them. \
289
+ If you use other models than FasterRCNN, you need to write your own custom reshape function.
290
+ """
291
+ )
292
+ eigencam_reshape = gr.Code(label="Reshape", value=
293
+ """
294
+ def reshape(x):
295
+ target_size = x['pool'].size()[-2 : ]
296
+ activations = []
297
+ for key, value in x.items():
298
+ activations.append(torch.nn.functional.interpolate(torch.abs(value), target_size, mode='bilinear'))
299
+ activations = torch.cat(activations, axis=1)
300
+
301
+ return activations
302
+ """, language="python")
303
+ eigencam_output = gr.Image()
304
+ eigencam_button = gr.Button("Submit")
305
+
306
+ xgradcam_button.click(xgradcam, inputs=[xgradcam_image, xgradcam_model, xgradcam_targetClass], outputs=xgradcam_output)
307
+ eigencam_button.click(eigencam, inputs=[eigencam_image, eigencam_model, eigencam_class, eigencam_process, eigencam_reshape], outputs=eigencam_output)
308
+
309
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ grad-cam
2
+ matplotlib
3
+ numpy>=1.14.3,<1.23.0
4
+ opencv-python
5
+ scipy
6
+ torch==1.13.1