Spaces:
Runtime error
Runtime error
import gradio as gr | |
import torch | |
import cv2 | |
### CAM explainer code from Intel XAI tools (https://github.com/IntelAI/intel-xai-tools) ### | |
class XGradCAM: | |
def __init__(self, model, targetLayer, targetClass, image, dims, device): | |
# set any frozen layers to trainable | |
# gradcam cannot be calculated without it | |
for param in model.parameters(): | |
if not param.requires_grad: | |
param.requires_grad = True | |
self.model = model | |
self.targetLayer = targetLayer | |
self.targetClass = targetClass | |
self.image = image | |
self.dims = dims | |
self.device = device | |
def visualize(self): | |
from pytorch_grad_cam import XGradCAM, GuidedBackpropReLUModel | |
from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget | |
from pytorch_grad_cam.utils.image import show_cam_on_image, deprocess_image, preprocess_image | |
import torch | |
import cv2 | |
import numpy as np | |
import matplotlib.pyplot as plt | |
self.model.eval().to(self.device) | |
image = cv2.resize(self.image, self.dims) | |
# convert to rgb if image is grayscale | |
converted = False | |
if len(image.shape) == 2: | |
converted = True | |
image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB) | |
rgb_img = np.float32(image) / 255 | |
input_tensor = preprocess_image(rgb_img, | |
mean=[0.485, 0.456, 0.406], | |
std=[0.229, 0.224, 0.225]) | |
input_tensor = input_tensor.to(self.device) | |
self.targetLayer = [self.targetLayer] | |
if self.targetClass is None: | |
targets = None | |
else: | |
targets = [ClassifierOutputTarget(self.targetClass)] | |
cam = XGradCAM(self.model, self.targetLayer, use_cuda=torch.cuda.is_available()) | |
# convert back to grayscale if that is the initial dim | |
if converted: | |
input_tensor = input_tensor[:, 0:1, :, :] | |
grayscale_cam = cam(input_tensor=input_tensor, targets=targets, aug_smooth=False, | |
eigen_smooth=False) | |
grayscale_cam = grayscale_cam[0, :] | |
cam_image = show_cam_on_image(rgb_img, grayscale_cam, use_rgb=True) | |
cam_image = cv2.cvtColor(cam_image, cv2.COLOR_RGB2BGR) | |
gb_model = GuidedBackpropReLUModel(model=self.model, use_cuda=torch.cuda.is_available()) | |
gb = gb_model(input_tensor, target_category=None) | |
cam_mask = cv2.merge([grayscale_cam, grayscale_cam, grayscale_cam]) | |
cam_gb = deprocess_image(cam_mask * gb) | |
gb = deprocess_image(gb) | |
print("XGradCAM, Guided backpropagation, and Guided XGradCAM are generated. ") | |
return cv2.cvtColor(cam_image, cv2.COLOR_RGB2BGR) | |
class EigenCAM: | |
def __init__(self, model, targetLayer, boxes, classes, colors, reshape, image, device): | |
self.model = model | |
self.targetLayer = targetLayer | |
self.boxes = boxes | |
self.classes = classes | |
self.colors = colors | |
self.reshape = reshape | |
self.image = image | |
self.device = device | |
def visualize(self): | |
from pytorch_grad_cam import EigenCAM | |
from pytorch_grad_cam.utils.image import show_cam_on_image, preprocess_image, scale_cam_image | |
import torchvision | |
import torch | |
import cv2 | |
import numpy as np | |
self.model.eval().to(self.device) | |
rgb_img = np.float32(self.image) / 255 | |
transform = torchvision.transforms.ToTensor() | |
input_tensor = transform(rgb_img) | |
input_tensor = input_tensor.unsqueeze(0) | |
input_tensor = input_tensor.to(self.device) | |
self.targetLayer = [self.targetLayer] | |
if self.reshape is None: | |
cam = EigenCAM(self.model, self.targetLayer, use_cuda=torch.cuda.is_available()) | |
else: | |
cam = EigenCAM(self.model, self.targetLayer, use_cuda=torch.cuda.is_available(), | |
reshape_transform=self.reshape) | |
targets = [] | |
grayscale_cam = cam(input_tensor=input_tensor, targets=targets, aug_smooth=False, | |
eigen_smooth=False) | |
grayscale_cam = grayscale_cam[0, :] | |
cam_image = show_cam_on_image(rgb_img, grayscale_cam, use_rgb=True) | |
renormalized_cam = np.zeros(grayscale_cam.shape, dtype=np.float32) | |
for x1, y1, x2, y2 in self.boxes: | |
renormalized_cam[y1:y2, x1:x2] = scale_cam_image(grayscale_cam[y1:y2, x1:x2].copy()) | |
renormalized_cam = scale_cam_image(renormalized_cam) | |
eigencam_image_renormalized = show_cam_on_image(rgb_img, renormalized_cam, use_rgb=True) | |
for i, box in enumerate(self.boxes): | |
color = self.colors[i] | |
cv2.rectangle( | |
eigencam_image_renormalized, | |
(box[0], box[1]), | |
(box[2], box[3]), | |
color, 2 | |
) | |
cv2.putText(eigencam_image_renormalized, self.classes[i], (box[0], box[1] - 5), | |
cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2, | |
lineType=cv2.LINE_AA) | |
print("EigenCAM is generated. ") | |
return eigencam_image_renormalized | |
### For Gradio Demo ### | |
def xgradcam(image, model_code, target_class): | |
global model, target_layer | |
exec(model_code, globals()) | |
if target_class == "": | |
target_class = None | |
else: | |
target_class = int(target_class) | |
image_dims = (224, 224) | |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
xgradcam = XGradCAM(model, target_layer, target_class, image, image_dims, device) | |
return xgradcam.visualize() | |
def eigencam(image, model_code, class_code, process_code, reshape_code): | |
global input_image, model, target_layer, bounding_box_coordinates, class_names, box_colors, reshape | |
input_image = cv2.resize(image, (640, 640)) | |
exec(model_code, globals()) | |
exec(class_code, globals()) | |
exec(process_code, globals()) | |
exec(reshape_code, globals()) | |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
eigencam = EigenCAM(model, target_layer, bounding_box_coordinates, class_names, box_colors, reshape, input_image, device) | |
return eigencam.visualize() | |
with gr.Blocks() as demo: | |
gr.Markdown( | |
""" | |
# Class Activation Mapping (CAM) Explainer Demo | |
This is a demo for CAM explainer from Intel XAI tools (https://github.com/IntelAI/intel-xai-tools). \ | |
CAM is an approach which localizes regions in the image responsible for a class prediction. \ | |
The demo shows visualization of XGradCAM for object classification model and EigenCAM for object detection model. | |
""" | |
) | |
with gr.Tab("XGradCAM"): | |
with gr.Row(): | |
with gr.Column(): | |
xgradcam_image = gr.Image(label="Input Image") | |
gr.Markdown( | |
""" | |
Load the pretrained model to the variable <code>model</code> depending on how it was saved. Then, specify <code>target_layer</code> (normally the last convolutional layer) to compute CAM for. \ | |
Here are some common choices: | |
- FasterRCNN: <code>model.backbone</code> | |
- ResNet18 and 50: <code>model.layer4</code> | |
- VGG and DenseNet161: <code>model.features</code> | |
Please don't change the variable names in the following code. | |
""" | |
) | |
xgradcam_model = gr.Code(label="Model and Target Layer", value= | |
""" | |
from torchvision.models import resnet50, ResNet50_Weights | |
model = resnet50(weights=ResNet50_Weights.IMAGENET1K_V2) | |
target_layer = model.layer4 | |
""", language="python") | |
gr.Markdown( | |
""" | |
Enter the target category as an integer to compute CAM for. It is the category index in the range <code>[0, NUM_OF_CLASSES-1]</code> based on the training dataset. \ | |
If it is left blank, the highest scoring category will be used. | |
""" | |
) | |
xgradcam_targetClass = gr.Textbox(label="Target Category") | |
xgradcam_output = gr.Image() | |
xgradcam_button = gr.Button("Submit") | |
with gr.Tab("EigenCAM"): | |
with gr.Row(): | |
with gr.Column(): | |
eigencam_image = gr.Image(label="Input Image") | |
gr.Markdown( | |
""" | |
Load the pretrained model to the variable <code>model</code> depending on how it was saved. Then, specify <code>target_layer</code> (normally the last convolutional layer) to compute CAM for. \ | |
Here are some common choices: | |
- FasterRCNN: <code>model.backbone</code> | |
- ResNet18 and 50: <code>model.layer4</code> | |
- VGG and DenseNet161: <code>model.features</code> | |
Please don't change the variable names in the following code. | |
""" | |
) | |
eigencam_model = gr.Code(label="Model and Target Layer", value= | |
""" | |
from torchvision.models.detection import fasterrcnn_resnet50_fpn | |
model = fasterrcnn_resnet50_fpn(pretrained=True).eval() | |
target_layer = model.backbone | |
""", language="python") | |
gr.Markdown( | |
""" | |
In the case there is no class name in the output from the model, specify <code>class_labels</code> as a list to print them with corresponding bounding box in the image. \ | |
Depending on the model, the class name might not be needed (e.g. YOLO). Then, create <code>color</code> as a list with a size of the number of classes. | |
""" | |
) | |
eigencam_class = gr.Code(label="Class Name", value= | |
""" | |
import numpy as np | |
class_labels = ['__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', | |
'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', | |
'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', | |
'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', | |
'N/A', 'N/A', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', | |
'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', | |
'surfboard', 'tennis racket', 'bottle', 'N/A', 'wine glass', 'cup', 'fork', | |
'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', | |
'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', | |
'potted plant', 'bed', 'N/A', 'dining table', 'N/A', 'N/A', 'toilet', | |
'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', | |
'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book', 'clock', 'vase', | |
'scissors', 'teddy bear', 'hair drier', 'toothbrush'] | |
color = np.random.uniform(0, 255, size=(len(class_labels), 3)) | |
""", language="python") | |
gr.Markdown( | |
""" | |
Get <code>output</code> of the model (in the case of FasterRCNN, convert <code>input_image</code> to a tensor first). Then, write a custom <code>process_output</code> function to process the outputs from the model. \ | |
You should get <code>bounding_box_coordinates</code>, <code>class_names</code>, and <code>box_colors</code> of the detected objects with a higher detection score than <code>detection_threshold</code> value. \ | |
If you use other models than FasterRCNN, you need to make your own custom process function to match the structure of the outputs from this function. | |
""" | |
) | |
eigencam_process = gr.Code(label="Output Processing", value= | |
""" | |
import torchvision | |
transform = torchvision.transforms.ToTensor() | |
input_tensor = transform(np.float32(input_image) / 255).unsqueeze(0) | |
output = model(input_tensor)[0] | |
def process_output(output, class_labels, color, detection_threshold): | |
boxes, classes, labels, colors = [], [], [], [] | |
box = output['boxes'].tolist() | |
name = [class_labels[i] for i in output['labels'].detach().numpy()] | |
label = output['labels'].detach().numpy() | |
for i in range(len(name)): | |
score = output['scores'].detach().numpy()[i] | |
if score < detection_threshold: | |
continue | |
boxes.append([int(b) for b in box[i]]) | |
classes.append(name[i]) | |
colors.append(color[label[i]]) | |
return boxes, classes, colors | |
detection_threshold = 0.9 | |
bounding_box_coordinates, class_names, box_colors = process_output(output, class_labels, color, detection_threshold) | |
""", language="python") | |
gr.Markdown( | |
""" | |
Write a custom <code>reshape</code> function to get the activations from the model and process them into 2D format. \ | |
For example, the backbone of FasterRCNN outputs 5 different tenors with different spatial size as an Ordered Dict, \ | |
thus, we need a custom function which aggregates these image tensors, resizes them to a common shape, and concatenates them. \ | |
If you use other models than FasterRCNN, you need to write your own custom reshape function. | |
""" | |
) | |
eigencam_reshape = gr.Code(label="Reshape", value= | |
""" | |
def reshape(x): | |
target_size = x['pool'].size()[-2 : ] | |
activations = [] | |
for key, value in x.items(): | |
activations.append(torch.nn.functional.interpolate(torch.abs(value), target_size, mode='bilinear')) | |
activations = torch.cat(activations, axis=1) | |
return activations | |
""", language="python") | |
eigencam_output = gr.Image() | |
eigencam_button = gr.Button("Submit") | |
xgradcam_button.click(xgradcam, inputs=[xgradcam_image, xgradcam_model, xgradcam_targetClass], outputs=xgradcam_output) | |
eigencam_button.click(eigencam, inputs=[eigencam_image, eigencam_model, eigencam_class, eigencam_process, eigencam_reshape], outputs=eigencam_output) | |
demo.launch() | |