File size: 12,631 Bytes
a83935b
 
 
7258769
 
a83935b
 
 
 
7258769
ca70147
a83935b
b9b9d1e
25f6407
b9b9d1e
 
69a9cd6
 
 
 
 
 
 
 
 
f3f7f83
69a9cd6
 
 
a83935b
 
a0cd1aa
8681fd1
 
 
 
 
b130371
87a5a37
a0cd1aa
b828907
76e90d1
5f76e47
69a9cd6
a83935b
87a5a37
7258769
 
 
a0cd1aa
7258769
 
a83935b
7258769
 
cc35325
 
7258769
cc35325
7258769
 
 
87a5a37
7258769
cc35325
 
 
49de1ea
 
 
cc35325
7258769
 
ca70147
 
cc35325
 
 
 
 
 
8901d30
a83935b
 
7258769
b130371
 
a83935b
 
 
8681fd1
7258769
87a5a37
5ee3f67
87a5a37
8681fd1
 
 
87a5a37
a83935b
b130371
a83935b
 
 
a0cd1aa
5ee3f67
a83935b
87a5a37
7258769
87a5a37
7258769
b130371
87a5a37
 
b130371
 
 
87a5a37
8681fd1
 
 
 
 
 
b130371
 
7258769
 
 
 
5ee3f67
ca70147
 
7258769
 
 
 
 
 
ca70147
 
 
 
 
7258769
 
 
 
 
 
 
 
 
 
 
ca70147
 
 
 
 
 
 
 
 
 
 
 
7258769
ca70147
 
7258769
 
87a5a37
 
7258769
87a5a37
 
 
a83935b
7258769
 
62760c4
25f6407
a83935b
5f76e47
35ffa41
c9676bf
8681fd1
5de86e8
69a9cd6
5de86e8
69a9cd6
fae3cfc
 
 
 
 
5f76e47
cc35325
5cc6347
 
35ffa41
5de86e8
35ffa41
cbdf386
5cc6347
35ffa41
5cc6347
 
35ffa41
5cc6347
a0cd1aa
 
87a5a37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca70147
 
87a5a37
ca70147
87a5a37
 
 
 
c90e817
c9676bf
87a5a37
ca70147
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87a5a37
 
 
 
 
a0cd1aa
 
 
 
 
0ebba77
 
 
 
69a9cd6
 
f3f7f83
 
 
 
 
 
a83935b
 
25f6407
a0cd1aa
87a5a37
a83935b
a0cd1aa
 
a83935b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
import numpy as np
from pytorch3d.ops import ball_query
from helpers import *
from handcrafted_solution import convert_entry_to_human_readable
import cv2
import hoho
import itertools
import torch
from pytorch3d.renderer import PerspectiveCameras
from hoho.color_mappings import gestalt_color_mapping
from PIL import Image

def my_empty_solution():
	return np.zeros((18,3)), [(0, 0)]


def cheat_the_metric_solution(vertices=None):
	if vertices is None:
		nverts = 18
		vertices_new = np.zeros((18,3))
	else:
		nverts = vertices.shape[0]
		vertices_new = vertices.mean(0)[None].repeat(nverts, axis=0)
	all_verts = list(range(nverts))
	edges = list(itertools.product(all_verts, all_verts))
	edges = [edg for edg in edges if edg[0] < edg[1]]
	return vertices_new, edges


class GeomSolver(object):

	def __init__(self):
		self.min_vertices = 10
		self.mean_vertices = 18
		self.max_vertices = 30
		self.kmeans_th = 200
		self.point_dist_th = 50
		self.th_min_support = 3
		self.clr_th = 2.5
		self.device = 'cuda:0'
		self.return_edges = False
		self.mean_fixed = False
		self.repeat_predicted = True
		self.cheat_metric = True

	def cluster_points(self, point_types):
		point_colors = []
		for point_type in point_types:
			point_colors.append(np.array(gestalt_color_mapping[point_type]))

		dist_points = np.zeros((self.verts.shape[0], ))
		visible_counts = np.zeros((self.verts.shape[0], ), dtype=int)
		proj_uv = []
		for ki in range(len(self.gestalt_to_colmap_cams)):
			if self.broken_cams[ki]:
				proj_uv.append(([], []))
				continue
			cki = self.gestalt_to_colmap_cams[ki]

			gest = self.gests[ki]
			vert_mask = 0
			for point_color in point_colors:
				my_mask = cv2.inRange(gest, point_color-self.clr_th, point_color+self.clr_th)
				vert_mask = vert_mask + my_mask
			vert_mask = (vert_mask > 0).astype(np.uint8)

			dist = cv2.distanceTransform(1-vert_mask, cv2.DIST_L2, 3)
			# dist[dist > 100] = 100
			# ndist = np.zeros_like(dist)
			# ndist = cv2.normalize(dist, ndist, 0, 1.0, cv2.NORM_MINMAX)

			in_this_image = np.array([cki in p.image_ids for p in self.points3D.values()])
			uv = torch.round(self.pyt_cameras[ki].transform_points(self.verts)[:, :2]).cpu().numpy().astype(int)
			height, width = dist.shape 
			uv_inl = (uv[:, 0] >= 0) * (uv[:, 1] >= 0) * (uv[:, 0] < width) * (uv[:, 1] < height) * in_this_image
			proj_uv.append((uv, uv_inl))
			uv = uv[uv_inl]

			dist_points[uv_inl] += dist[uv[:,1], uv[:,0]]
			visible_counts[uv_inl] += 1

		selected_points = (dist_points / (visible_counts + 1e-6)) <= self.point_dist_th
		selected_points[visible_counts < 1] = False

		pnts = torch.from_numpy(self.xyz[selected_points].astype(np.float32))[None]
		bdists, inds, nn = ball_query(pnts, pnts, K=3, radius=40) 
		dense_pnts = (bdists[0] > 0).sum(1) == 3

		criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 200, 0.3)
		flags = cv2.KMEANS_RANDOM_CENTERS
		point_inds = np.arange(self.xyz.shape[0])
		centers = np.zeros((0, 3))
		assigned_points = []
		if len(self.xyz[selected_points][dense_pnts]) == 0 or dense_pnts.sum() == 0 or selected_points.sum() == 0:
			return centers, assigned_points
		if len(self.xyz[selected_points][dense_pnts]) == 1:
			return self.xyz[selected_points][dense_pnts], [point_inds[selected_points][dense_pnts]]
		for tempi in range(1, 30):
		    retval, temp_bestLabels, temp_centers = cv2.kmeans(self.xyz[selected_points][dense_pnts].astype(np.float32), tempi, None, criteria, 200,flags)
		    cpnts = torch.from_numpy(temp_centers.astype(np.float32))[None]
		    bdists, inds, nn = ball_query(cpnts, cpnts, K=2, radius=1.2*self.kmeans_th) 
		    if bdists.max() > 0:
		        closest_nn = (bdists[bdists>0].min()**0.5).item()
		    else:
		        closest_nn = self.kmeans_th
		    if closest_nn < self.kmeans_th or tempi == self.xyz[selected_points][dense_pnts].shape[0]:
		        break
		    centers, bestLabels = temp_centers, temp_bestLabels
		if centers.shape[0] == 0:
			centers, bestLabels = temp_centers, temp_bestLabels

		centers_selected = []
		for ci in range(centers.shape[0]):
			assigned_inds = point_inds[selected_points][dense_pnts][bestLabels[:,0] == ci]
			if len(assigned_inds) < self.th_min_support:
				continue
			centers_selected.append(centers[ci])
			assigned_points.append(assigned_inds)
		if len(centers_selected) == 0:
			print("Not centers with enough support!")
			for ci in range(centers.shape[0]):
				assigned_inds = point_inds[selected_points][dense_pnts][bestLabels[:,0] == ci]
				assigned_points.append(assigned_inds)
			return centers, assigned_points
		centers_selected = np.stack(centers_selected)
		return centers_selected, assigned_points


	def process_vertices(self):
		human_entry = self.human_entry

		col_cams = [hoho.Rt_to_eye_target(Image.new('RGB', (human_entry['cameras'][colmap_img.camera_id].width, human_entry['cameras'][colmap_img.camera_id].height)), to_K(*human_entry['cameras'][colmap_img.camera_id].params), quaternion_to_rotation_matrix(colmap_img.qvec), colmap_img.tvec) for colmap_img in human_entry['images'].values()]
		# eye, target, up, fov = col_cams[0]

		cameras, images, self.points3D = human_entry['cameras'], human_entry['images'], human_entry['points3d']
		colmap_cameras_tf = list(human_entry['images'].keys())
		self.xyz = np.stack([p.xyz for p in self.points3D.values()])
		color = np.stack([p.rgb for p in self.points3D.values()])
		self.gests = [np.array(gest0) for gest0 in human_entry['gestalt']]
		# for ki in range(1, len(self.gests)):
		# 	if self.gests[ki].shape != self.gests[0].shape:
		# 		self.gests[ki] = self.gests[ki].transpose(1,0,2)

		to_camera_ids = np.array([colmap_img.camera_id for colmap_img in human_entry['images'].values()])

		gestalt_camcet = np.stack([eye for eye, target, up, fov in itertools.starmap(hoho.Rt_to_eye_target, zip(*[human_entry[k] for k in 'ade20k K R t'.split()]))])
		col_camcet = np.stack([eye for eye, target, up, fov in col_cams])
		self.gestalt_to_colmap_cams = [colmap_cameras_tf[np.argmin(((gcam - col_camcet)**2).sum(1)**0.5)] for gcam in gestalt_camcet]
		self.broken_cams = np.array([np.min(((gcam - col_camcet)**2).sum(1)**0.5) for gcam in gestalt_camcet]) > 300

		N = len(self.gestalt_to_colmap_cams)
		R = np.stack([quaternion_to_rotation_matrix(human_entry['images'][self.gestalt_to_colmap_cams[ind]].qvec) for ind in range(N)])
		T = np.stack([human_entry['images'][self.gestalt_to_colmap_cams[ind]].tvec for ind in range(N)])

		R = np.linalg.inv(R)
		image_size = []
		K = []
		for ind in range(N):
			cid = to_camera_ids[np.array(colmap_cameras_tf) == self.gestalt_to_colmap_cams[ind]][0]
			sz = np.array([cameras[cid].height, cameras[cid].width])
			image_size.append(sz)
			K.append(to_K(*human_entry['cameras'][cid].params))
		image_size = np.stack(image_size)
		K = np.stack(K)
		# K = to_K(*human_entry['cameras'][1].params)[None].repeat(N, 0)
		# self.height, self.width = cameras[1].height, cameras[1].width
		# image_size = torch.Tensor([self.height, self.width]).repeat(N, 1)
		self.pyt_cameras = PerspectiveCameras(device=self.device, R=R, T=T, in_ndc=False, focal_length=K[:, 0, :1], principal_point=K[:, :2, 2], image_size=image_size)
		

		self.verts = torch.from_numpy(self.xyz.astype(np.float32)).to(self.device)

		centers_apex, assigned_apex = self.cluster_points(['apex'])
		centers_eave, assigned_eave = self.cluster_points(['eave_end_point'])
		centers = np.concatenate((centers_apex, centers_eave))
		self.assigned_points = assigned_apex + assigned_eave
		self.is_apex = np.zeros((centers.shape[0], )).astype(int)
		self.is_apex[:centers_apex.shape[0]] = 1

		z_th = centers[:,-1].min() - 50
		self.wf_center = self.xyz[self.xyz[:,-1] > z_th].mean(0)
		self.wf_center[-1] = centers[:, -1].mean()

		self.vertices = centers
		nvert = centers.shape[0]
		# desired_vertices = (self.xyz[:,-1] > z_th).sum() // 300
		desired_vertices = int(2*nvert)
		# desired_vertices = self.mean_vertices
		if desired_vertices < self.min_vertices:
			desired_vertices = self.mean_vertices
		if desired_vertices > self.max_vertices:
			desired_vertices = self.mean_vertices
		# if self.broken_cams.any():
		# 	vertices = centers
		# 	print("There are broken cams.")
		if nvert >= desired_vertices:
			vertices = centers[:desired_vertices]
			print("Enough vertices.")
		else:
			vertices = centers
			if self.repeat_predicted:
				while vertices.shape[0] < desired_vertices:
					vertices = np.concatenate((vertices, centers)) # [~self.is_apex]
				vertices = vertices[:desired_vertices]
			else:
				if self.mean_fixed:
					added_one = (desired_vertices * self.wf_center - self.vertices.sum(0)) / (desired_vertices - nvert)
				else:
					added_one = self.wf_center
				added = added_one[None].repeat(desired_vertices - nvert,0)
				vertices = np.concatenate((self.vertices, added))
		self.vertices_aug = vertices


	def process_edges(self):
		N = len(self.gests)
		image_ids = np.array([p.id for p in self.points3D.values()])
		center_visibility = [set(np.concatenate([self.points3D[image_ids[pind]].image_ids for pind in ass_item])) for ass_item in self.assigned_points]

		pyt_centers = torch.from_numpy(self.vertices.astype(np.float32)).to(self.device)

		edge_dists = []
		uvs = []
		edge_types = {0 : ['eave'], 1 : ['rake', 'valley'], 2 : ['ridge']}
		for ki in range(N):
		    gest = self.gests[ki]
		    edge_masks = {}
		    per_type_dists = {}
		    for etype in edge_types:
		        edge_mask = 0
		        for edge_class in edge_types[etype]:
		            edge_color = np.array(gestalt_color_mapping[edge_class])
		            mask = cv2.morphologyEx(cv2.inRange(gest,
		                                                edge_color-self.clr_th,
		                                                edge_color+self.clr_th),
		                                    cv2.MORPH_DILATE, np.ones((3, 3)))
		            edge_mask += mask
		        edge_mask = (edge_mask > 0).astype(np.uint8)
		        edge_masks[etype] = edge_mask
		        dist = cv2.distanceTransform(1-edge_mask, cv2.DIST_L2, 3)
		        per_type_dists[etype] = dist
		    edge_dists.append(per_type_dists)
		    height, width, _ = gest.shape 

		    uv = torch.round(self.pyt_cameras[ki].transform_points(pyt_centers)[:, :2]).cpu().numpy().astype(int)
		    uv_inl = (uv[:, 0] >= 0) * (uv[:, 1] >= 0) * (uv[:, 0] < width) * (uv[:, 1] < height)
		    uv = uv[uv_inl]
		    uvs.append(uv)

		edges = []
		# thresholds_min_mean = {0 : [5, 7], 1 : [9, 25], 2: [30, 1000]}
		thresholds_min_mean = {0 : [1, 7], 1 : [3, 25], 2: [3, 1000]}
		for i in range(pyt_centers.shape[0]):
			for j in range(i+1, pyt_centers.shape[0]):
				etype = (self.is_apex[i] + self.is_apex[j])

				points_inter = pyt_centers[i][None] + torch.linspace(0, 1, 20)[:, None].to(self.device) * (pyt_centers[j][None] - pyt_centers[i][None])
				min_mean_dist = 1000
				all_dists = []
				best_ki = -1
				best_uvi = -1
				for ki in range(N):
					cki = self.gestalt_to_colmap_cams[ki]

					if not ( (cki in center_visibility[i]) or (cki in center_visibility[j]) ):
						continue
					if self.broken_cams[ki]:
						continue

					height, width, _ = self.gests[ki].shape
					uvi = torch.round(self.pyt_cameras[ki].transform_points(points_inter)[:, :2]).cpu().numpy().astype(int)
					if (uvi <= 0).any() or (uvi[:,0] >= width).any() or (uvi[:,1] >= height).any():
						continue
					mean_dist = edge_dists[ki][etype][uvi[:,1], uvi[:,0]].mean()
					all_dists.append(mean_dist)
					if mean_dist < min_mean_dist:
						min_mean_dist = mean_dist
						best_ki = ki
						best_uvi = uvi

				if best_ki == -1:
					continue
				ths = thresholds_min_mean[etype]
				if min_mean_dist < ths[0] and np.mean(all_dists) < ths[1]:
					edges.append((i,j))
		if len(edges) == 0:
			edges.append((0, 0))
		return edges


	def solve(self, entry, visualize=False):
		human_entry = convert_entry_to_human_readable(entry)
		self.human_entry = human_entry
		self.process_vertices()
		vertices = self.vertices_aug
		if self.return_edges:
			edges = self.process_edges()
		else:
			edges = [(0, 0)]

		if self.cheat_metric:
			dumb_vertices = np.zeros((vertices.shape[0],3))
			# dumb_vertices = self.wf_center[None].repeat(vertices.shape[0], axis=0)
			vertices, edges = cheat_the_metric_solution(dumb_vertices)
			# vertices_new, edges = cheat_the_metric_solution(np.zeros((vertices.shape[0] // 2,3)))
			# vertices = np.concatenate((vertices_new, vertices[:vertices_new.shape[0]]))

		if visualize:
			from hoho.viz3d import plot_estimate_and_gt
			plot_estimate_and_gt(vertices, [(0,0)], self.human_entry['wf_vertices'], self.human_entry['wf_edges'])
		
		return vertices, edges