rozumden
/

DRAK

Model card Files Files and versions Community

Denys Rozumnyi commited on Jun 10, 2024

Commit

ca70147

1 Parent(s): 5ee3f67

update

Browse files

Files changed (2) hide show

geom_solver.py +59 -42
testing.ipynb +44 -24

geom_solver.py CHANGED Viewed

@@ -8,6 +8,7 @@ import itertools
 import torch
 from pytorch3d.renderer import PerspectiveCameras
 from hoho.color_mappings import gestalt_color_mapping
 def my_empty_solution():
 	return np.zeros((18,3)), [(0, 0)]
@@ -56,7 +57,8 @@ class GeomSolver(object):
 			in_this_image = np.array([cki in p.image_ids for p in self.points3D.values()])
 			uv = torch.round(self.pyt_cameras[ki].transform_points(self.verts)[:, :2]).cpu().numpy().astype(int)
-			uv_inl = (uv[:, 0] >= 0) * (uv[:, 1] >= 0) * (uv[:, 0] < self.width) * (uv[:, 1] < self.height) * in_this_image
 			proj_uv.append((uv, uv_inl))
 			uv = uv[uv_inl]
@@ -113,32 +115,45 @@ class GeomSolver(object):
 	def process_vertices(self):
 		human_entry = self.human_entry
-		col_cams = [hoho.Rt_to_eye_target(human_entry['ade20k'][0], to_K(*human_entry['cameras'][1].params), quaternion_to_rotation_matrix(colmap_img.qvec), colmap_img.tvec) for colmap_img in human_entry['images'].values()]
-		eye, target, up, fov = col_cams[0]
 		cameras, images, self.points3D = human_entry['cameras'], human_entry['images'], human_entry['points3d']
 		colmap_cameras_tf = list(human_entry['images'].keys())
 		self.xyz = np.stack([p.xyz for p in self.points3D.values()])
 		color = np.stack([p.rgb for p in self.points3D.values()])
 		self.gests = [np.array(gest0) for gest0 in human_entry['gestalt']]
-		for ki in range(1, len(self.gests)):
-			if self.gests[ki].shape != self.gests[0].shape:
-				self.gests[ki] = self.gests[ki].transpose(1,0,2)
 		gestalt_camcet = np.stack([eye for eye, target, up, fov in itertools.starmap(hoho.Rt_to_eye_target, zip(*[human_entry[k] for k in 'ade20k K R t'.split()]))])
 		col_camcet = np.stack([eye for eye, target, up, fov in col_cams])
 		self.gestalt_to_colmap_cams = [colmap_cameras_tf[np.argmin(((gcam - col_camcet)**2).sum(1)**0.5)] for gcam in gestalt_camcet]
 		self.broken_cams = np.array([np.min(((gcam - col_camcet)**2).sum(1)**0.5) for gcam in gestalt_camcet]) > 300
-		self.height, self.width = cameras[1].height, cameras[1].width
 		N = len(self.gestalt_to_colmap_cams)
-		K = to_K(*human_entry['cameras'][1].params)[None].repeat(N, 0)
 		R = np.stack([quaternion_to_rotation_matrix(human_entry['images'][self.gestalt_to_colmap_cams[ind]].qvec) for ind in range(N)])
 		T = np.stack([human_entry['images'][self.gestalt_to_colmap_cams[ind]].tvec for ind in range(N)])
 		R = np.linalg.inv(R)
-		image_size = torch.Tensor([self.height, self.width]).repeat(N, 1)
 		self.pyt_cameras = PerspectiveCameras(device=self.device, R=R, T=T, in_ndc=False, focal_length=K[:, 0, :1], principal_point=K[:, :2, 2], image_size=image_size)
 		self.verts = torch.from_numpy(self.xyz.astype(np.float32)).to(self.device)
 		centers_apex, assigned_apex = self.cluster_points(['apex'])
@@ -211,9 +226,10 @@ class GeomSolver(object):
 		        dist = cv2.distanceTransform(1-edge_mask, cv2.DIST_L2, 3)
 		        per_type_dists[etype] = dist
 		    edge_dists.append(per_type_dists)
 		    uv = torch.round(self.pyt_cameras[ki].transform_points(pyt_centers)[:, :2]).cpu().numpy().astype(int)
-		    uv_inl = (uv[:, 0] >= 0) * (uv[:, 1] >= 0) * (uv[:, 0] < self.width) * (uv[:, 1] < self.height)
 		    uv = uv[uv_inl]
 		    uvs.append(uv)
@@ -221,37 +237,38 @@ class GeomSolver(object):
 		thresholds_min_mean = {0 : [5, 7], 1 : [9, 25], 2: [30, 1000]}
 		# thresholds_min_mean = {0 : [1, 7], 1 : [1, 25], 2: [1, 1000]}
 		for i in range(pyt_centers.shape[0]):
-		    for j in range(i+1, pyt_centers.shape[0]):
-		        etype = (self.is_apex[i] + self.is_apex[j])
-		        points_inter = pyt_centers[i][None] + torch.linspace(0, 1, 20)[:, None].to(self.device) * (pyt_centers[j][None] - pyt_centers[i][None])
-		        min_mean_dist = 1000
-		        all_dists = []
-		        best_ki = -1
-		        best_uvi = -1
-		        for ki in range(N):
-		            cki = self.gestalt_to_colmap_cams[ki]
-		            if not ( (cki in center_visibility[i]) or (cki in center_visibility[j]) ):
-		                continue
-		            if self.broken_cams[ki]:
-		                continue
-		            uvi = torch.round(self.pyt_cameras[ki].transform_points(points_inter)[:, :2]).cpu().numpy().astype(int)
-		            if (uvi <= 0).any() or (uvi[:,0] >= self.width).any() or (uvi[:,1] >= self.height).any():
-		                continue
-		            mean_dist = edge_dists[ki][etype][uvi[:,1], uvi[:,0]].mean()
-		            all_dists.append(mean_dist)
-		            if mean_dist < min_mean_dist:
-		                min_mean_dist = mean_dist
-		                best_ki = ki
-		                best_uvi = uvi
-		        if best_ki == -1:
-		            continue
-		        ths = thresholds_min_mean[etype]
-		        if min_mean_dist < ths[0] and np.mean(all_dists) < ths[1]:
-		            edges.append((i,j))
 		if len(edges) == 0:
 			edges.append((0, 0))
 		return edges

 import torch
 from pytorch3d.renderer import PerspectiveCameras
 from hoho.color_mappings import gestalt_color_mapping
+from PIL import Image
 def my_empty_solution():
 	return np.zeros((18,3)), [(0, 0)]
 			in_this_image = np.array([cki in p.image_ids for p in self.points3D.values()])
 			uv = torch.round(self.pyt_cameras[ki].transform_points(self.verts)[:, :2]).cpu().numpy().astype(int)
+			height, width = dist.shape
+			uv_inl = (uv[:, 0] >= 0) * (uv[:, 1] >= 0) * (uv[:, 0] < width) * (uv[:, 1] < height) * in_this_image
 			proj_uv.append((uv, uv_inl))
 			uv = uv[uv_inl]
 	def process_vertices(self):
 		human_entry = self.human_entry
+		col_cams = [hoho.Rt_to_eye_target(Image.new('RGB', (human_entry['cameras'][colmap_img.camera_id].width, human_entry['cameras'][colmap_img.camera_id].height)), to_K(*human_entry['cameras'][colmap_img.camera_id].params), quaternion_to_rotation_matrix(colmap_img.qvec), colmap_img.tvec) for colmap_img in human_entry['images'].values()]
+		# eye, target, up, fov = col_cams[0]
 		cameras, images, self.points3D = human_entry['cameras'], human_entry['images'], human_entry['points3d']
 		colmap_cameras_tf = list(human_entry['images'].keys())
 		self.xyz = np.stack([p.xyz for p in self.points3D.values()])
 		color = np.stack([p.rgb for p in self.points3D.values()])
 		self.gests = [np.array(gest0) for gest0 in human_entry['gestalt']]
+		# for ki in range(1, len(self.gests)):
+		# 	if self.gests[ki].shape != self.gests[0].shape:
+		# 		self.gests[ki] = self.gests[ki].transpose(1,0,2)
+		to_camera_ids = np.array([colmap_img.camera_id for colmap_img in human_entry['images'].values()])
 		gestalt_camcet = np.stack([eye for eye, target, up, fov in itertools.starmap(hoho.Rt_to_eye_target, zip(*[human_entry[k] for k in 'ade20k K R t'.split()]))])
 		col_camcet = np.stack([eye for eye, target, up, fov in col_cams])
 		self.gestalt_to_colmap_cams = [colmap_cameras_tf[np.argmin(((gcam - col_camcet)**2).sum(1)**0.5)] for gcam in gestalt_camcet]
 		self.broken_cams = np.array([np.min(((gcam - col_camcet)**2).sum(1)**0.5) for gcam in gestalt_camcet]) > 300
 		N = len(self.gestalt_to_colmap_cams)
 		R = np.stack([quaternion_to_rotation_matrix(human_entry['images'][self.gestalt_to_colmap_cams[ind]].qvec) for ind in range(N)])
 		T = np.stack([human_entry['images'][self.gestalt_to_colmap_cams[ind]].tvec for ind in range(N)])
 		R = np.linalg.inv(R)
+		image_size = []
+		K = []
+		for ind in range(N):
+			cid = to_camera_ids[np.array(colmap_cameras_tf) == self.gestalt_to_colmap_cams[ind]][0]
+			sz = np.array([cameras[cid].height, cameras[cid].width])
+			image_size.append(sz)
+			K.append(to_K(*human_entry['cameras'][cid].params))
+		image_size = np.stack(image_size)
+		K = np.stack(K)
+		# K = to_K(*human_entry['cameras'][1].params)[None].repeat(N, 0)
+		# self.height, self.width = cameras[1].height, cameras[1].width
+		# image_size = torch.Tensor([self.height, self.width]).repeat(N, 1)
 		self.pyt_cameras = PerspectiveCameras(device=self.device, R=R, T=T, in_ndc=False, focal_length=K[:, 0, :1], principal_point=K[:, :2, 2], image_size=image_size)
 		self.verts = torch.from_numpy(self.xyz.astype(np.float32)).to(self.device)
 		centers_apex, assigned_apex = self.cluster_points(['apex'])
 		        dist = cv2.distanceTransform(1-edge_mask, cv2.DIST_L2, 3)
 		        per_type_dists[etype] = dist
 		    edge_dists.append(per_type_dists)
+		    height, width, _ = gest.shape
 		    uv = torch.round(self.pyt_cameras[ki].transform_points(pyt_centers)[:, :2]).cpu().numpy().astype(int)
+		    uv_inl = (uv[:, 0] >= 0) * (uv[:, 1] >= 0) * (uv[:, 0] < width) * (uv[:, 1] < height)
 		    uv = uv[uv_inl]
 		    uvs.append(uv)
 		thresholds_min_mean = {0 : [5, 7], 1 : [9, 25], 2: [30, 1000]}
 		# thresholds_min_mean = {0 : [1, 7], 1 : [1, 25], 2: [1, 1000]}
 		for i in range(pyt_centers.shape[0]):
+			for j in range(i+1, pyt_centers.shape[0]):
+				etype = (self.is_apex[i] + self.is_apex[j])
+				points_inter = pyt_centers[i][None] + torch.linspace(0, 1, 20)[:, None].to(self.device) * (pyt_centers[j][None] - pyt_centers[i][None])
+				min_mean_dist = 1000
+				all_dists = []
+				best_ki = -1
+				best_uvi = -1
+				for ki in range(N):
+					cki = self.gestalt_to_colmap_cams[ki]
+					if not ( (cki in center_visibility[i]) or (cki in center_visibility[j]) ):
+						continue
+					if self.broken_cams[ki]:
+						continue
+					height, width, _ = self.gests[ki].shape
+					uvi = torch.round(self.pyt_cameras[ki].transform_points(points_inter)[:, :2]).cpu().numpy().astype(int)
+					if (uvi <= 0).any() or (uvi[:,0] >= width).any() or (uvi[:,1] >= height).any():
+						continue
+					mean_dist = edge_dists[ki][etype][uvi[:,1], uvi[:,0]].mean()
+					all_dists.append(mean_dist)
+					if mean_dist < min_mean_dist:
+						min_mean_dist = mean_dist
+						best_ki = ki
+						best_uvi = uvi
+				if best_ki == -1:
+					continue
+				ths = thresholds_min_mean[etype]
+				if min_mean_dist < ths[0] and np.mean(all_dists) < ths[1]:
+					edges.append((i,j))
 		if len(edges) == 0:
 			edges.append((0, 0))
 		return edges

testing.ipynb CHANGED Viewed

@@ -176,7 +176,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 127,
    "id": "88f4fc8f-efa9-404b-9073-c7d4a73f9075",
    "metadata": {},
    "outputs": [
@@ -184,21 +184,38 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "2.391964449292722 1.77839901463355\n"
-     ]
-    },
-    {
-     "ename": "KeyboardInterrupt",
-     "evalue": "",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[127], line 20\u001b[0m\n\u001b[1;32m     14\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m ki, entry \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(dataset):\n\u001b[1;32m     15\u001b[0m     \u001b[38;5;66;03m# if ki < 153: wrong camera ids\u001b[39;00m\n\u001b[1;32m     16\u001b[0m         \u001b[38;5;66;03m# continue\u001b[39;00m\n\u001b[1;32m     17\u001b[0m     \u001b[38;5;66;03m# if ki < 162: different cameras and different image sizes\u001b[39;00m\n\u001b[1;32m     18\u001b[0m         \u001b[38;5;66;03m# continue\u001b[39;00m\n\u001b[1;32m     19\u001b[0m     solver \u001b[38;5;241m=\u001b[39m GeomSolver()\n\u001b[0;32m---> 20\u001b[0m     vertices, edges \u001b[38;5;241m=\u001b[39m \u001b[43msolver\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msolve\u001b[49m\u001b[43m(\u001b[49m\u001b[43mentry\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     22\u001b[0m     scores0 \u001b[38;5;241m=\u001b[39m (compute_WED(np\u001b[38;5;241m.\u001b[39mzeros((entry[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mwf_vertices\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m],\u001b[38;5;241m3\u001b[39m)),\n\u001b[1;32m     23\u001b[0m             [(\u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m0\u001b[39m)],\n\u001b[1;32m     24\u001b[0m             np\u001b[38;5;241m.\u001b[39marray(entry[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mwf_vertices\u001b[39m\u001b[38;5;124m'\u001b[39m]),\n\u001b[1;32m     25\u001b[0m             np\u001b[38;5;241m.\u001b[39marray(entry[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mwf_edges\u001b[39m\u001b[38;5;124m'\u001b[39m])))\n\u001b[1;32m     27\u001b[0m     scores \u001b[38;5;241m=\u001b[39m (compute_WED(vertices,\n\u001b[1;32m     28\u001b[0m                 edges,\n\u001b[1;32m     29\u001b[0m                 np\u001b[38;5;241m.\u001b[39marray(entry[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mwf_vertices\u001b[39m\u001b[38;5;124m'\u001b[39m]),\n\u001b[1;32m     30\u001b[0m                 np\u001b[38;5;241m.\u001b[39marray(entry[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mwf_edges\u001b[39m\u001b[38;5;124m'\u001b[39m])))\n",
-      "File \u001b[0;32m~/src/DRAK/geom_solver.py:263\u001b[0m, in \u001b[0;36mGeomSolver.solve\u001b[0;34m(self, entry, visualize)\u001b[0m\n\u001b[1;32m    261\u001b[0m human_entry \u001b[38;5;241m=\u001b[39m convert_entry_to_human_readable(entry)\n\u001b[1;32m    262\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhuman_entry \u001b[38;5;241m=\u001b[39m human_entry\n\u001b[0;32m--> 263\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprocess_vertices\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    264\u001b[0m vertices \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvertices_aug\n\u001b[1;32m    265\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreturn_edges:\n",
-      "File \u001b[0;32m~/src/DRAK/geom_solver.py:145\u001b[0m, in \u001b[0;36mGeomSolver.process_vertices\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    142\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mverts \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mfrom_numpy(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mxyz\u001b[38;5;241m.\u001b[39mastype(np\u001b[38;5;241m.\u001b[39mfloat32))\u001b[38;5;241m.\u001b[39mto(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdevice)\n\u001b[1;32m    144\u001b[0m centers_apex, assigned_apex \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcluster_points([\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mapex\u001b[39m\u001b[38;5;124m'\u001b[39m])\n\u001b[0;32m--> 145\u001b[0m centers_eave, assigned_eave \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcluster_points\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43meave_end_point\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    146\u001b[0m centers \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mconcatenate((centers_apex, centers_eave))\n\u001b[1;32m    147\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39massigned_points \u001b[38;5;241m=\u001b[39m assigned_apex \u001b[38;5;241m+\u001b[39m assigned_eave\n",
-      "File \u001b[0;32m~/src/DRAK/geom_solver.py:83\u001b[0m, in \u001b[0;36mGeomSolver.cluster_points\u001b[0;34m(self, point_types)\u001b[0m\n\u001b[1;32m     81\u001b[0m \t\u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mxyz[selected_points][dense_pnts], [point_inds[selected_points][dense_pnts]]\n\u001b[1;32m     82\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m tempi \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m30\u001b[39m):\n\u001b[0;32m---> 83\u001b[0m     retval, temp_bestLabels, temp_centers \u001b[38;5;241m=\u001b[39m \u001b[43mcv2\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkmeans\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mxyz\u001b[49m\u001b[43m[\u001b[49m\u001b[43mselected_points\u001b[49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[43mdense_pnts\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mastype\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfloat32\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtempi\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcriteria\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m200\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43mflags\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     84\u001b[0m     cpnts \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mfrom_numpy(temp_centers\u001b[38;5;241m.\u001b[39mastype(np\u001b[38;5;241m.\u001b[39mfloat32))[\u001b[38;5;28;01mNone\u001b[39;00m]\n\u001b[1;32m     85\u001b[0m     bdists, inds, nn \u001b[38;5;241m=\u001b[39m ball_query(cpnts, cpnts, K\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m2\u001b[39m, radius\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1.2\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mkmeans_th) \n",
-      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
      ]
     }
    ],
@@ -209,7 +226,7 @@
     "torch.manual_seed(0)\n",
     "# One shard of the dataset\n",
     "dataset = wds.WebDataset(hf_hub_download(repo_id='usm3d/hoho-train-set',\n",
-    "            filename='data/train/hoho_v3_000-of-032.tar.gz',\n",
     "            repo_type=\"dataset\"))\n",
     "\n",
     "dataset = dataset.decode()\n",
@@ -217,10 +234,10 @@
     "sc0 = []\n",
     "sc = []\n",
     "for ki, entry in enumerate(dataset):\n",
-    "    # if ki < 153: wrong camera ids\n",
-    "        # continue\n",
-    "    # if ki < 162: different cameras and different image sizes\n",
-    "        # continue\n",
     "    solver = GeomSolver()\n",
     "    vertices, edges = solver.solve(entry)\n",
     "    \n",
@@ -258,22 +275,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 123,
    "id": "a34d0a2b-9986-47cc-8a3f-c15397370c4d",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "162"
       ]
      },
-     "execution_count": 123,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "ki"
    ]
   },

   },
   {
    "cell_type": "code",
+   "execution_count": 195,
    "id": "88f4fc8f-efa9-404b-9073-c7d4a73f9075",
    "metadata": {},
    "outputs": [
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "2.4183324229440974 2.038011551581092\n",
+      "1.9395643500589714 2.0791329825966307\n",
+      "2.815541573372287 2.5366888251094535\n",
+      "2.3254810143936755 1.648047653136293\n",
+      "2.3286533191380765 1.9072141098956248\n",
+      "2.342868026940067 1.9669375395419237\n",
+      "2.04873204164575 1.8261703137595002\n",
+      "2.167980973024546 1.5160824709801253\n",
+      "2.571328055702413 2.020824999345514\n",
+      "2.0127597511603774 2.0591274017651693\n",
+      "1.961080548873729 1.8200081675273143\n",
+      "2.1617889927813057 1.6349180819215263\n",
+      "2.0817899095268078 1.5980454240474442\n",
+      "2.484359575186481 1.9029737257539197\n",
+      "2.2997637652539 1.794924573842486\n",
+      "2.440276048704617 2.5473207175534065\n",
+      "2.135748984421359 1.6773471477819646\n",
+      "2.3955711940075584 2.5549005605155117\n",
+      "2.516982526006783 2.2057496708814113\n",
+      "2.521576698581939 1.615795718071218\n",
+      "1.8495344082304994 1.57835603284867\n",
+      "2.932654420428436 1.8880643234492105\n",
+      "1.9387187366052139 1.6981983307675732\n",
+      "2.135296338517323 1.8054484663488366\n",
+      "2.527322316920579 1.7985373132549314\n",
+      "2.681606928809334 2.2261780476238493\n",
+      "2.4613439446210306 1.6635009702924486\n",
+      "2.3497006297365304 1.815488520909667\n",
+      "2.2946897561795074 1.6075314409425536\n",
+      "2.2292662656029454 2.059612285543195\n",
+      "Averages\n",
+      "2.312343783912538 1.9030379122529486\n"
      ]
     }
    ],
     "torch.manual_seed(0)\n",
     "# One shard of the dataset\n",
     "dataset = wds.WebDataset(hf_hub_download(repo_id='usm3d/hoho-train-set',\n",
+    "            filename='data/train/hoho_v3_001-of-032.tar.gz',\n",
     "            repo_type=\"dataset\"))\n",
     "\n",
     "dataset = dataset.decode()\n",
     "sc0 = []\n",
     "sc = []\n",
     "for ki, entry in enumerate(dataset):\n",
+    "    # if ki < 153: # wrong camera ids\n",
+    "    #     continue\n",
+    "    # if ki < 162: # different cameras and different image sizes\n",
+    "    #     continue\n",
     "    solver = GeomSolver()\n",
     "    vertices, edges = solver.solve(entry)\n",
     "    \n",
   },
   {
    "cell_type": "code",
+   "execution_count": 185,
    "id": "a34d0a2b-9986-47cc-8a3f-c15397370c4d",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
+       "0"
       ]
      },
+     "execution_count": 185,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
+    "# human_entry['images'][1]\n",
+    "# # human_entry['cameras'][1].width\n",
+    "# Image.new('RGB', (human_entry['cameras'][1].width, human_entry['cameras'][1].height)).height\n",
     "ki"
    ]
   },