kastan commited on
Commit
b85aa9e
Β·
1 Parent(s): 36cfe46

add custom dependencies

Browse files
Files changed (3) hide show
  1. clip_for_ppts.py +163 -0
  2. gpu_memory_utils.py +57 -0
  3. requirements.txt +4 -1
clip_for_ppts.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import torch
4
+ import clip
5
+ from PIL import Image
6
+ from pptx import Presentation
7
+ from pptx.enum.shapes import MSO_SHAPE_TYPE
8
+ import time
9
+
10
+
11
+
12
+ class ClipImage:
13
+ def __init__(self, path_of_ppt_folders, path_to_save_image_features, mode='image', device='cuda'):
14
+ """
15
+ :param input_image_path: path of the input image (mode = 'image') or the actual text to be searched (mode='text')
16
+ :param path_of_ppt_folders: path of the folder containing all the ppt folders
17
+ :param path_to_save_image_features: path to save the image features
18
+ :param mode: 'image' or 'text' based on the type of input
19
+ :param device: device to run the model on
20
+ """
21
+ # Path
22
+ directory = 'input_features'
23
+ path = os.path.join(path_to_save_image_features, directory)
24
+ if not os.path.exists(path):
25
+ # Create the directory
26
+ os.mkdir(path)
27
+ print("Directory '% s' created" % directory)
28
+
29
+ self.res = []
30
+ if not os.path.isdir(path_of_ppt_folders):
31
+ raise TypeError(
32
+ f"{path_of_ppt_folders} is not a directory. Please only enter a directory")
33
+
34
+ # if mode == 'image' and not os.path.exists(input_image_path):
35
+ # raise FileNotFoundError(f"{input_image_path} does not exist.")
36
+ if not os.path.exists(path_to_save_image_features) or not os.path.isdir(path_to_save_image_features):
37
+ raise FileNotFoundError(
38
+ f"{path_to_save_image_features} is not a directory or doesn't exist.")
39
+ self.mode = mode
40
+ self.path_of_ppt_folders = path_of_ppt_folders
41
+ self.path_to_save_image_features = path_to_save_image_features
42
+ self.device = device
43
+
44
+ # consider ViT-L/14 should be the best one
45
+ self.model, self.preprocess = clip.load('ViT-B/32', self.device)
46
+
47
+ #print("πŸ‘‰ RUNNING CLIP'S ONE-TIME ENCODING STEP... will be slow the first time, and hopefully only the first time.")
48
+ # passing in an image as a cheap hack, to make one funciton work for initial embedding.
49
+ #self.calculate_similarity('/home/rsalvi/chatbotai/rohan/ai-teaching-assistant-uiuc/lecture_slides/001/Slide1.jpeg')
50
+ #print("πŸ”₯ DONE with CLIP's ONE TIME ENCODING")
51
+
52
+ def text_to_image_search(self, search_text: str, top_k_to_return: int = 4):
53
+ """ Written after the fact by kastan, so that we don't have to call init every time. """
54
+ assert type(search_text) == str, f"Must provide a single string, instead I got type {type(search_text)}"
55
+ # self.create_input_features(search_text, mode='text')
56
+ self.mode = 'text'
57
+ return self.calculate_similarity(search_text, top_k_to_return)
58
+
59
+ # TODO: WIP.
60
+ def image_to_images_search(self, input_image, top_k_to_return: int = 4):
61
+ """ Written after the fact by kastan, so that we don't have to call init every time. """
62
+ self.mode = 'image'
63
+ return self.calculate_similarity(input_image, top_k_to_return)
64
+
65
+
66
+ def create_input_features(self, input_text_or_img):
67
+ if self.mode == 'image':
68
+ # Load the image
69
+ #input_image = Image.open(input_text_or_img) # Not needed as image comes from gradio in PIL format
70
+ # Preprocess the image
71
+ input_arr = torch.cat(
72
+ [self.preprocess(input_text_or_img).unsqueeze(0)]).to(self.device)
73
+
74
+ elif self.mode == 'text':
75
+ # Preprocess the text
76
+ input_arr = torch.cat(
77
+ [clip.tokenize(f"{input_text_or_img}")]).to(self.device)
78
+
79
+ # Encode the image or text
80
+ with torch.no_grad():
81
+ if self.mode == 'image':
82
+ input_features = self.model.encode_image(input_arr)
83
+ elif self.mode == 'text':
84
+ input_features = self.model.encode_text(input_arr)
85
+ input_features /= input_features.norm(dim=-1, keepdim=True)
86
+ return input_features
87
+
88
+ def new_most_similar_slide_file(self, top_k: int):
89
+ # Sort the results
90
+ ans = sorted(self.res, key=lambda x: x[2], reverse=True)
91
+ return ans[:top_k]
92
+
93
+ def calculate_similarity(self, input_text_or_img, topk_val: int = 4):
94
+ ## Similarities across folders
95
+ self.res = []
96
+ all_similarities = []
97
+ slide_numbers = []
98
+ # Create the input features
99
+ input_features = self.create_input_features(input_text_or_img)
100
+
101
+ # Iterate through all the folders
102
+ ppts = list(os.listdir(self.path_of_ppt_folders))
103
+ #start_time = time.monotonic()
104
+ for i in ppts:
105
+ # Get the path of the folder containing the ppt images
106
+ imgs = list(os.listdir(os.path.join(self.path_of_ppt_folders, i)))
107
+ slide_numbers.append(imgs)
108
+ # Iterate through all the images and preprocess them
109
+
110
+
111
+ # Check if the preprocessed file exists and load it
112
+ img_flag = os.path.exists(
113
+ self.path_to_save_image_features+'/input_features'+"/slides_"+i+"_tensor.pt")
114
+ if img_flag:
115
+ image_features = torch.load(
116
+ self.path_to_save_image_features+'/input_features'+"/slides_"+i+"_tensor.pt", map_location=self.device)
117
+ else:
118
+ # Encode the images and save the encoding
119
+ with torch.no_grad():
120
+ image_input = torch.cat([self.preprocess(Image.open(os.path.join(
121
+ self.path_of_ppt_folders, i, image))).unsqueeze(0) for image in imgs]).to(self.device)
122
+ image_features = self.model.encode_image(image_input)
123
+ image_features /= image_features.norm(dim=-1, keepdim=True)
124
+ torch.save(image_features,
125
+ self.path_to_save_image_features+'/input_features'+"/slides_"+i+"_tensor.pt")
126
+ print("Saved the image features (for faster future loading) to: ",
127
+ self.path_to_save_image_features+"/slides_"+i+"_tensor.pt")
128
+
129
+ # Calculate the similarity between the input image and the images in the folder
130
+
131
+ # TODO: THIS REQUIRES REFACTOR. We're only looking in a SINGLE FOLDER. need to APPEND to similarity.
132
+ if self.mode == 'image':
133
+ similarity = (100.0 * input_features @
134
+ image_features.T).softmax(dim=-1)
135
+ all_similarities.append((i,similarity))
136
+ elif self.mode == 'text':
137
+ similarity = (100.0 * input_features @
138
+ image_features.T).softmax(dim=-1)
139
+ all_similarities.append((i,similarity))
140
+
141
+
142
+ ## Looking over all the folders
143
+ similarity_results = []
144
+
145
+ for j in range(0,len(all_similarities)):
146
+ folder_name = all_similarities[j][0]
147
+ folder_values = all_similarities[j][1][0]
148
+ for i in range(0,len(folder_values)):
149
+ self.res.append((folder_name,slide_numbers[j][i],folder_values[i]))
150
+
151
+ #print(self.res)
152
+
153
+ return self.new_most_similar_slide_file(topk_val)
154
+ # Return the sorted results
155
+
156
+ # if __name__ == "__main__":
157
+
158
+ # demo = ClipImage('/home/rsalvi/chatbotai/rohan/ai-teaching-assistant-uiuc/lecture_slides','/home/rsalvi/chatbotai/rohan/ai-teaching-assistant-uiuc')
159
+ # #op = demo.image_to_images_search('/home/rsalvi/chatbotai/rohan/ai-teaching-assistant-uiuc/lecture_slides/01c/Slide5.jpeg')
160
+ # op = demo.text_to_image_search("Unsigned Bit Pattern")
161
+ # print(op)
162
+ # op = demo.text_to_image_search("Graycode")
163
+ # print(op)
gpu_memory_utils.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import GPUtil # pip install gputil
2
+
3
+
4
+ def get_gpu_ids_with_sufficient_memory(memory_requirement_GB):
5
+ '''
6
+ Returns the MINIMAL SET of GPU IDs that, combined, have at least `memory_requirement` MB of free memory.
7
+ You will need to use all returned GPU IDs to get the desired memory requirement.
8
+ It returns lower IDs first [0, 1, ...]
9
+
10
+ If `memory_requirement` is 0, returns all available GPUs.
11
+ If `memory_requirement` is not available, returns an empty list.
12
+ '''
13
+ memory_requirement_MB = float(memory_requirement_GB * 1024)
14
+ GPUs = sorted(GPUtil.getGPUs(), key=lambda x: x.memoryFree, reverse=True)
15
+ total_memory = sum(gpu.memoryFree for gpu in GPUs)
16
+ if memory_requirement_MB > total_memory:
17
+ return []
18
+ GPU_IDs = []
19
+ for gpu in GPUs:
20
+ if memory_requirement_MB <= 0:
21
+ break
22
+ GPU_IDs.append(gpu.id)
23
+ memory_requirement_MB -= gpu.memoryFree
24
+ return GPU_IDs
25
+
26
+
27
+ def get_device_with_most_free_memory():
28
+ '''
29
+ Returns the GPU ID of the GPU with the most free memory.
30
+ '''
31
+ GPUs = GPUtil.getGPUs()
32
+ return sorted(GPUs, key=lambda x: x.memoryFree, reverse=True)[0].id
33
+
34
+
35
+ def get_free_memory_dict(leave_extra_memory_unused_GiB: float = 2, leave_extra_memory_unused_gpu0_GiB: float = 3):
36
+ '''
37
+ Returns a dictionary of GPU IDs and their free memory, in MiB.
38
+ Compatible with huggingface Accelerate formatting: `max_memory=get_free_memory_dict()`
39
+
40
+ Accelerate seems to use more memory than we give it, so we default to telling Accelerate we have 2 GiB less than we actually do.
41
+
42
+ Example output:
43
+ {0: '24753MiB', 1: '26223MiB', 2: '25603MiB', 3: '9044MiB'}
44
+ '''
45
+ GPUs = GPUtil.getGPUs()
46
+ memory_map = {gpu.id: int(round(gpu.memoryFree)) for gpu in GPUs}
47
+ if leave_extra_memory_unused_GiB > 0:
48
+ for device_id, memory_MiB in memory_map.items():
49
+ memory_map[device_id] = memory_MiB - (leave_extra_memory_unused_GiB * 1024)
50
+ if leave_extra_memory_unused_gpu0_GiB > 0 and 0 in memory_map:
51
+ memory_map[0] = memory_map[0] - (leave_extra_memory_unused_gpu0_GiB * 1024)
52
+
53
+ # format to Accelerate's liking
54
+ for device_id, memory_MiB in memory_map.items():
55
+ memory_map[device_id] = f"{int(round(memory_MiB))}MiB"
56
+
57
+ return memory_map
requirements.txt CHANGED
@@ -4,4 +4,7 @@ pinecone-client
4
  sentence-transformers
5
  pandas
6
  langchain
7
- python-dotenv
 
 
 
 
4
  sentence-transformers
5
  pandas
6
  langchain
7
+ gputil
8
+ clip
9
+ torch
10
+ transformers