Spaces:

Intel
/

intel-xai-tools-cam-demo

Runtime error

App Files Files Community

yoonhachoe commited on Apr 20, 2023

Commit

8d763c6

•

1 Parent(s): fc9f69f

initial commit

Browse files

Files changed (2) hide show

app.py +309 -0
requirements.txt +6 -0

app.py ADDED Viewed

	@@ -0,0 +1,309 @@

+import gradio as gr
+import torch
+import cv2
+### CAM explainer code from Intel XAI tools (https://github.com/IntelAI/intel-xai-tools) ###
+class XGradCAM:
+    def __init__(self, model, targetLayer, targetClass, image, dims, device):
+        # set any frozen layers to trainable
+        # gradcam cannot be calculated without it
+        for param in model.parameters():
+            if not param.requires_grad:
+                param.requires_grad = True
+        self.model = model
+        self.targetLayer = targetLayer
+        self.targetClass = targetClass
+        self.image = image
+        self.dims = dims
+        self.device = device
+    def visualize(self):
+        from pytorch_grad_cam import XGradCAM, GuidedBackpropReLUModel
+        from pytorch_grad_cam.utils.model_targets import ClassifierOutputTarget
+        from pytorch_grad_cam.utils.image import show_cam_on_image, deprocess_image, preprocess_image
+        import torch
+        import cv2
+        import numpy as np
+        import matplotlib.pyplot as plt
+        self.model.eval().to(self.device)
+        image = cv2.resize(self.image, self.dims)
+        # convert to rgb if image is grayscale
+        converted = False
+        if len(image.shape) == 2:
+            converted = True
+            image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
+        rgb_img = np.float32(image) / 255
+        input_tensor = preprocess_image(rgb_img,
+                                        mean=[0.485, 0.456, 0.406],
+                                        std=[0.229, 0.224, 0.225])
+        input_tensor = input_tensor.to(self.device)
+        self.targetLayer = [self.targetLayer]
+        if self.targetClass is None:
+            targets = None
+        else:
+            targets = [ClassifierOutputTarget(self.targetClass)]
+        cam = XGradCAM(self.model, self.targetLayer, use_cuda=torch.cuda.is_available())
+        # convert back to grayscale if that is the initial dim
+        if converted:
+            input_tensor = input_tensor[:, 0:1, :, :]
+        grayscale_cam = cam(input_tensor=input_tensor, targets=targets, aug_smooth=False,
+                            eigen_smooth=False)
+        grayscale_cam = grayscale_cam[0, :]
+        cam_image = show_cam_on_image(rgb_img, grayscale_cam, use_rgb=True)
+        cam_image = cv2.cvtColor(cam_image, cv2.COLOR_RGB2BGR)
+        gb_model = GuidedBackpropReLUModel(model=self.model, use_cuda=torch.cuda.is_available())
+        gb = gb_model(input_tensor, target_category=None)
+        cam_mask = cv2.merge([grayscale_cam, grayscale_cam, grayscale_cam])
+        cam_gb = deprocess_image(cam_mask * gb)
+        gb = deprocess_image(gb)
+        print("XGradCAM, Guided backpropagation, and Guided XGradCAM are generated. ")
+        return cv2.cvtColor(cam_image, cv2.COLOR_RGB2BGR)
+class EigenCAM:
+    def __init__(self, model, targetLayer, boxes, classes, colors, reshape, image, device):
+        self.model = model
+        self.targetLayer = targetLayer
+        self.boxes = boxes
+        self.classes = classes
+        self.colors = colors
+        self.reshape = reshape
+        self.image = image
+        self.device = device
+    def visualize(self):
+        from pytorch_grad_cam import EigenCAM
+        from pytorch_grad_cam.utils.image import show_cam_on_image, preprocess_image, scale_cam_image
+        import torchvision
+        import torch
+        import cv2
+        import numpy as np
+        self.model.eval().to(self.device)
+        rgb_img = np.float32(self.image) / 255
+        transform = torchvision.transforms.ToTensor()
+        input_tensor = transform(rgb_img)
+        input_tensor = input_tensor.unsqueeze(0)
+        input_tensor = input_tensor.to(self.device)
+        self.targetLayer = [self.targetLayer]
+        if self.reshape is None:
+            cam = EigenCAM(self.model, self.targetLayer, use_cuda=torch.cuda.is_available())
+        else:
+            cam = EigenCAM(self.model, self.targetLayer, use_cuda=torch.cuda.is_available(),
+                           reshape_transform=self.reshape)
+        targets = []
+        grayscale_cam = cam(input_tensor=input_tensor, targets=targets, aug_smooth=False,
+                            eigen_smooth=False)
+        grayscale_cam = grayscale_cam[0, :]
+        cam_image = show_cam_on_image(rgb_img, grayscale_cam, use_rgb=True)
+        renormalized_cam = np.zeros(grayscale_cam.shape, dtype=np.float32)
+        for x1, y1, x2, y2 in self.boxes:
+            renormalized_cam[y1:y2, x1:x2] = scale_cam_image(grayscale_cam[y1:y2, x1:x2].copy())
+        renormalized_cam = scale_cam_image(renormalized_cam)
+        eigencam_image_renormalized = show_cam_on_image(rgb_img, renormalized_cam, use_rgb=True)
+        for i, box in enumerate(self.boxes):
+            color = self.colors[i]
+            cv2.rectangle(
+                eigencam_image_renormalized,
+                (box[0], box[1]),
+                (box[2], box[3]),
+                color, 2
+            )
+            cv2.putText(eigencam_image_renormalized, self.classes[i], (box[0], box[1] - 5),
+                        cv2.FONT_HERSHEY_SIMPLEX, 0.8, color, 2,
+                        lineType=cv2.LINE_AA)
+        print("EigenCAM is generated. ")
+        return eigencam_image_renormalized
+### For Gradio Demo ###
+def xgradcam(image, model_code, target_class):
+    global model, target_layer
+    exec(model_code, globals())
+    if target_class == "":
+        target_class = None
+    else:
+        target_class = int(target_class)
+    image_dims = (224, 224)
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    xgradcam = XGradCAM(model, target_layer, target_class, image, image_dims, device)
+    return xgradcam.visualize()
+def eigencam(image, model_code, class_code, process_code, reshape_code):
+    global input_image, model, target_layer, bounding_box_coordinates, class_names, box_colors, reshape
+    input_image = cv2.resize(image, (640, 640))
+    exec(model_code, globals())
+    exec(class_code, globals())
+    exec(process_code, globals())
+    exec(reshape_code, globals())
+    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    eigencam = EigenCAM(model, target_layer, bounding_box_coordinates, class_names, box_colors, reshape, input_image, device)
+    return eigencam.visualize()
+with gr.Blocks() as demo:
+    gr.Markdown(
+        """
+        # Class Activation Mapping (CAM) Explainer Demo
+        This is a demo for CAM explainer from Intel XAI tools (https://github.com/IntelAI/intel-xai-tools). \
+        CAM is an approach which localizes regions in the image responsible for a class prediction. \
+         demo shows visualization of XGradCAM for object classification model and EigenCAM for object detection model.
+        """
+    )
+    with gr.Tab("XGradCAM"):
+        with gr.Row():
+            with gr.Column():
+                xgradcam_image = gr.Image(label="Input Image")
+                gr.Markdown(
+                    """
+                    Load the pretrained model to the variable <code>model</code> depending on how it was saved. Then, specify <code>target_layer</code> (normally the last convolutional layer) to compute CAM for. \
+                    Here are some common choices:
+                      - FasterRCNN: <code>model.backbone</code>
+                      - ResNet18 and 50: <code>model.layer4</code>
+                      - VGG and DenseNet161: <code>model.features</code>
+                    Please don't change the variable names in the following code.
+                    """
+                )
+                xgradcam_model = gr.Code(label="Model and Target Layer", value=
+                    """
+                    from torchvision.models import resnet50, ResNet50_Weights
+                    model = resnet50(weights=ResNet50_Weights.IMAGENET1K_V2)
+                    target_layer = model.layer4
+                    """, language="python")
+                gr.Markdown(
+                    """
+                    Enter the target category as an integer to compute CAM for. It is the category index in the range <code>[0, NUM_OF_CLASSES-1]</code> based on the training dataset. \
+                    If it is left blank, the highest scoring category will be used.
+                    """
+                )
+                xgradcam_targetClass = gr.Textbox(label="Target Category")
+            xgradcam_output = gr.Image()
+        xgradcam_button = gr.Button("Submit")
+    with gr.Tab("EigenCAM"):
+        with gr.Row():
+            with gr.Column():
+                eigencam_image = gr.Image(label="Input Image")
+                gr.Markdown(
+                    """
+                    Load the pretrained model to the variable <code>model</code> depending on how it was saved. Then, specify <code>target_layer</code> (normally the last convolutional layer) to compute CAM for. \
+                    Here are some common choices:
+                      - FasterRCNN: <code>model.backbone</code>
+                      - ResNet18 and 50: <code>model.layer4</code>
+                      - VGG and DenseNet161: <code>model.features</code>
+                    Please don't change the variable names in the following code.
+                    """
+                )
+                eigencam_model = gr.Code(label="Model and Target Layer", value=
+                    """
+                    from torchvision.models.detection import fasterrcnn_resnet50_fpn
+                    model = fasterrcnn_resnet50_fpn(pretrained=True).eval()
+                    target_layer = model.backbone
+                    """, language="python")
+                gr.Markdown(
+                    """
+                    In the case there is no class name in the output from the model, specify <code>class_labels</code> as a list to print them with corresponding bounding box in the image. \
+                    Depending on the model, the class name might not be needed (e.g. YOLO). Then, create <code>color</code> as a list with a size of the number of classes.
+                    """
+                )
+                eigencam_class = gr.Code(label="Class Name", value=
+                    """
+                    import numpy as np
+                    class_labels = ['__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
+                        'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A',
+                        'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',
+                        'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella',
+                        'N/A', 'N/A', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',
+                        'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard',
+                        'surfboard', 'tennis racket', 'bottle', 'N/A', 'wine glass', 'cup', 'fork',
+                        'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
+                        'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+                        'potted plant', 'bed', 'N/A', 'dining table', 'N/A', 'N/A', 'toilet',
+                        'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave',
+                        'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book', 'clock', 'vase',
+                        'scissors', 'teddy bear', 'hair drier', 'toothbrush']
+                    color = np.random.uniform(0, 255, size=(len(class_labels), 3))
+                    """, language="python")
+                gr.Markdown(
+                    """
+                    Get <code>output</code> of the model (in the case of FasterRCNN, convert <code>input_image</code> to a tensor first). Then, write a custom <code>process_output</code> function to process the outputs from the model. \
+                    You should get <code>bounding_box_coordinates</code>, <code>class_names</code>, and <code>box_colors</code> of the detected objects with a higher detection score than <code>detection_threshold</code> value. \
+                    If you use other models than FasterRCNN, you need to make your own custom process function to match the structure of the outputs from this function.
+                    """
+                )
+                eigencam_process = gr.Code(label="Output Processing", value=
+                    """
+                    import torchvision
+                    transform = torchvision.transforms.ToTensor()
+                    input_tensor = transform(np.float32(input_image) / 255).unsqueeze(0)
+                    output = model(input_tensor)[0]
+                    def process_output(output, class_labels, color, detection_threshold):
+                        boxes, classes, labels, colors = [], [], [], []
+                        box = output['boxes'].tolist()
+                        name = [class_labels[i] for i in output['labels'].detach().numpy()]
+                        label = output['labels'].detach().numpy()
+                        for i in range(len(name)):
+                            score = output['scores'].detach().numpy()[i]
+                            if score < detection_threshold:
+                                continue
+                            boxes.append([int(b) for b in box[i]])
+                            classes.append(name[i])
+                            colors.append(color[label[i]])
+                        return boxes, classes, colors
+                    detection_threshold = 0.9
+                    bounding_box_coordinates, class_names, box_colors = process_output(output, class_labels, color, detection_threshold)
+                    """, language="python")
+                gr.Markdown(
+                    """
+                    Write a custom <code>reshape</code> function to get the activations from the model and process them into 2D format. \
+                    For example, the backbone of FasterRCNN outputs 5 different tenors with different spatial size as an Ordered Dict, \
+                    thus, we need a custom function which aggregates these image tensors, resizes them to a common shape, and concatenates them. \
+                    If you use other models than FasterRCNN, you need to write your own custom reshape function.
+                    """
+                )
+                eigencam_reshape = gr.Code(label="Reshape", value=
+                    """
+                    def reshape(x):
+                        target_size = x['pool'].size()[-2 : ]
+                        activations = []
+                        for key, value in x.items():
+                            activations.append(torch.nn.functional.interpolate(torch.abs(value), target_size, mode='bilinear'))
+                        activations = torch.cat(activations, axis=1)
+                        return activations
+                    """, language="python")
+            eigencam_output = gr.Image()
+        eigencam_button = gr.Button("Submit")
+    xgradcam_button.click(xgradcam, inputs=[xgradcam_image, xgradcam_model, xgradcam_targetClass], outputs=xgradcam_output)
+    eigencam_button.click(eigencam, inputs=[eigencam_image, eigencam_model, eigencam_class, eigencam_process, eigencam_reshape], outputs=eigencam_output)
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+grad-cam
+matplotlib
+numpy>=1.14.3,<1.23.0
+opencv-python
+scipy
+torch==1.13.1