njanakiev commited on
Commit
be419be
·
1 Parent(s): abd231a

Initial commit

Browse files
assets/cat_dog.jpg ADDED
flagged/img ndarray/0.jpg ADDED
flagged/img ndarray/1.jpg ADDED
flagged/log.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 'text','img ndarray','output','timestamp'
2
+ 'big ship','img ndarray/0.jpg','output/0.png','2022-04-16 19:37:48.314750'
3
+ 'microphone','img ndarray/1.jpg','output/1.png','2022-04-16 21:45:35.413185'
flagged/output/0.png ADDED
flagged/output/1.png ADDED
gradcam/__pycache__/utils.cpython-38.pyc ADDED
Binary file (2.77 kB). View file
 
gradcam/app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import clip
3
+ import torch
4
+
5
+ import utils
6
+
7
+ clip_model = "RN50x4"
8
+ device = "cuda" if torch.cuda.is_available() else "cpu"
9
+ model, preprocess = clip.load(clip_model, device=device, jit=False)
10
+ model.eval()
11
+
12
+
13
+ def grad_cam_fn(text, img, saliency_layer):
14
+ resize = model.visual.input_resolution
15
+ img = img.resize((resize, resize))
16
+
17
+ text_input = clip.tokenize([text]).to(device)
18
+ text_feature = model.encode_text(text_input).float()
19
+ image_input = preprocess(img).unsqueeze(0).to(device)
20
+
21
+ attn_map = utils.gradCAM(
22
+ model.visual,
23
+ image_input,
24
+ text_feature,
25
+ getattr(model.visual, saliency_layer)
26
+ )
27
+ attn_map = attn_map.squeeze().detach().cpu().numpy()
28
+ attn_map = utils.getAttMap(img, attn_map)
29
+
30
+ return attn_map
31
+
32
+
33
+ if __name__ == '__main__':
34
+ interface = gr.Interface(
35
+ fn=grad_cam_fn,
36
+ inputs=[
37
+ gr.inputs.Textbox(
38
+ label="Target Text",
39
+ lines=1),
40
+ gr.inputs.Image(
41
+ label='Input Image',
42
+ image_mode="RGB",
43
+ type='pil',
44
+ shape=(512, 512)),
45
+ gr.inputs.Dropdown(
46
+ ["layer4", "layer3", "layer2", "layer1"],
47
+ default="layer4",
48
+ label="Saliency Layer")
49
+ ],
50
+ outputs=gr.outputs.Image(
51
+ type="pil",
52
+ label="Attention Map"),
53
+ examples=[
54
+ ['a cat lying on the floor', 'assets/cat_dog.jpg', 'layer4'],
55
+ ['a dog sitting', 'assets/cat_dog.jpg', 'layer4']
56
+ ],
57
+ description="OpenAI CLIP Grad CAM")
58
+ interface.launch(
59
+ server_name='0.0.0.0',
60
+ server_port=7861,
61
+ share=False)
gradcam/utils.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import torch
3
+ import torch.nn as nn
4
+ import torch.nn.functional as F
5
+ import matplotlib.cm
6
+ from PIL import Image
7
+
8
+
9
+ class Hook:
10
+ """Attaches to a module and records its activations and gradients."""
11
+
12
+ def __init__(self, module: nn.Module):
13
+ self.data = None
14
+ self.hook = module.register_forward_hook(self.save_grad)
15
+
16
+ def save_grad(self, module, input, output):
17
+ self.data = output
18
+ output.requires_grad_(True)
19
+ output.retain_grad()
20
+
21
+ def __enter__(self):
22
+ return self
23
+
24
+ def __exit__(self, exc_type, exc_value, exc_traceback):
25
+ self.hook.remove()
26
+
27
+ @property
28
+ def activation(self) -> torch.Tensor:
29
+ return self.data
30
+
31
+ @property
32
+ def gradient(self) -> torch.Tensor:
33
+ return self.data.grad
34
+
35
+
36
+ # Reference: https://arxiv.org/abs/1610.02391
37
+ def gradCAM(
38
+ model: nn.Module,
39
+ input: torch.Tensor,
40
+ target: torch.Tensor,
41
+ layer: nn.Module
42
+ ) -> torch.Tensor:
43
+ # Zero out any gradients at the input.
44
+ if input.grad is not None:
45
+ input.grad.data.zero_()
46
+
47
+ # Disable gradient settings.
48
+ requires_grad = {}
49
+ for name, param in model.named_parameters():
50
+ requires_grad[name] = param.requires_grad
51
+ param.requires_grad_(False)
52
+
53
+ # Attach a hook to the model at the desired layer.
54
+ assert isinstance(layer, nn.Module)
55
+ with Hook(layer) as hook:
56
+ # Do a forward and backward pass.
57
+ output = model(input)
58
+ output.backward(target)
59
+
60
+ grad = hook.gradient.float()
61
+ act = hook.activation.float()
62
+
63
+ # Global average pool gradient across spatial dimension
64
+ # to obtain importance weights.
65
+ alpha = grad.mean(dim=(2, 3), keepdim=True)
66
+ # Weighted combination of activation maps over channel
67
+ # dimension.
68
+ gradcam = torch.sum(act * alpha, dim=1, keepdim=True)
69
+ # We only want neurons with positive influence so we
70
+ # clamp any negative ones.
71
+ gradcam = torch.clamp(gradcam, min=0)
72
+
73
+ # Resize gradcam to input resolution.
74
+ gradcam = F.interpolate(
75
+ gradcam,
76
+ input.shape[2:],
77
+ mode='bicubic',
78
+ align_corners=False)
79
+
80
+ # Restore gradient settings.
81
+ for name, param in model.named_parameters():
82
+ param.requires_grad_(requires_grad[name])
83
+
84
+ return gradcam
85
+
86
+
87
+ # Modified from: https://github.com/salesforce/ALBEF/blob/main/visualization.ipynb
88
+ def getAttMap(img, attn_map):
89
+ # Normalize attention map
90
+ attn_map = attn_map - attn_map.min()
91
+ if attn_map.max() > 0:
92
+ attn_map = attn_map / attn_map.max()
93
+
94
+ H = matplotlib.cm.jet(attn_map)
95
+ H = (H * 255).astype(np.uint8)[:, :, :3]
96
+ img_heatmap = Image.fromarray(H)
97
+ img_heatmap = img_heatmap.resize((256, 256))
98
+
99
+ return Image.blend(
100
+ img.resize((256, 256)), img_heatmap, 0.4)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio>=2.9.0,<2.10.0
2
+ torch>=1.10.0,<1.11.0
3
+ git+https://github.com/openai/CLIP.git
4
+ Pillow
5
+ matplotlib
6
+ numpy