Denys Rozumnyi commited on
Commit
ca70147
·
1 Parent(s): 5ee3f67
Files changed (2) hide show
  1. geom_solver.py +59 -42
  2. testing.ipynb +44 -24
geom_solver.py CHANGED
@@ -8,6 +8,7 @@ import itertools
8
  import torch
9
  from pytorch3d.renderer import PerspectiveCameras
10
  from hoho.color_mappings import gestalt_color_mapping
 
11
 
12
  def my_empty_solution():
13
  return np.zeros((18,3)), [(0, 0)]
@@ -56,7 +57,8 @@ class GeomSolver(object):
56
 
57
  in_this_image = np.array([cki in p.image_ids for p in self.points3D.values()])
58
  uv = torch.round(self.pyt_cameras[ki].transform_points(self.verts)[:, :2]).cpu().numpy().astype(int)
59
- uv_inl = (uv[:, 0] >= 0) * (uv[:, 1] >= 0) * (uv[:, 0] < self.width) * (uv[:, 1] < self.height) * in_this_image
 
60
  proj_uv.append((uv, uv_inl))
61
  uv = uv[uv_inl]
62
 
@@ -113,32 +115,45 @@ class GeomSolver(object):
113
  def process_vertices(self):
114
  human_entry = self.human_entry
115
 
116
- col_cams = [hoho.Rt_to_eye_target(human_entry['ade20k'][0], to_K(*human_entry['cameras'][1].params), quaternion_to_rotation_matrix(colmap_img.qvec), colmap_img.tvec) for colmap_img in human_entry['images'].values()]
117
- eye, target, up, fov = col_cams[0]
118
 
119
  cameras, images, self.points3D = human_entry['cameras'], human_entry['images'], human_entry['points3d']
120
  colmap_cameras_tf = list(human_entry['images'].keys())
121
  self.xyz = np.stack([p.xyz for p in self.points3D.values()])
122
  color = np.stack([p.rgb for p in self.points3D.values()])
123
  self.gests = [np.array(gest0) for gest0 in human_entry['gestalt']]
124
- for ki in range(1, len(self.gests)):
125
- if self.gests[ki].shape != self.gests[0].shape:
126
- self.gests[ki] = self.gests[ki].transpose(1,0,2)
 
 
127
 
128
  gestalt_camcet = np.stack([eye for eye, target, up, fov in itertools.starmap(hoho.Rt_to_eye_target, zip(*[human_entry[k] for k in 'ade20k K R t'.split()]))])
129
  col_camcet = np.stack([eye for eye, target, up, fov in col_cams])
130
  self.gestalt_to_colmap_cams = [colmap_cameras_tf[np.argmin(((gcam - col_camcet)**2).sum(1)**0.5)] for gcam in gestalt_camcet]
131
  self.broken_cams = np.array([np.min(((gcam - col_camcet)**2).sum(1)**0.5) for gcam in gestalt_camcet]) > 300
132
 
133
- self.height, self.width = cameras[1].height, cameras[1].width
134
  N = len(self.gestalt_to_colmap_cams)
135
- K = to_K(*human_entry['cameras'][1].params)[None].repeat(N, 0)
136
  R = np.stack([quaternion_to_rotation_matrix(human_entry['images'][self.gestalt_to_colmap_cams[ind]].qvec) for ind in range(N)])
137
  T = np.stack([human_entry['images'][self.gestalt_to_colmap_cams[ind]].tvec for ind in range(N)])
138
 
139
  R = np.linalg.inv(R)
140
- image_size = torch.Tensor([self.height, self.width]).repeat(N, 1)
 
 
 
 
 
 
 
 
 
 
 
141
  self.pyt_cameras = PerspectiveCameras(device=self.device, R=R, T=T, in_ndc=False, focal_length=K[:, 0, :1], principal_point=K[:, :2, 2], image_size=image_size)
 
 
142
  self.verts = torch.from_numpy(self.xyz.astype(np.float32)).to(self.device)
143
 
144
  centers_apex, assigned_apex = self.cluster_points(['apex'])
@@ -211,9 +226,10 @@ class GeomSolver(object):
211
  dist = cv2.distanceTransform(1-edge_mask, cv2.DIST_L2, 3)
212
  per_type_dists[etype] = dist
213
  edge_dists.append(per_type_dists)
214
-
 
215
  uv = torch.round(self.pyt_cameras[ki].transform_points(pyt_centers)[:, :2]).cpu().numpy().astype(int)
216
- uv_inl = (uv[:, 0] >= 0) * (uv[:, 1] >= 0) * (uv[:, 0] < self.width) * (uv[:, 1] < self.height)
217
  uv = uv[uv_inl]
218
  uvs.append(uv)
219
 
@@ -221,37 +237,38 @@ class GeomSolver(object):
221
  thresholds_min_mean = {0 : [5, 7], 1 : [9, 25], 2: [30, 1000]}
222
  # thresholds_min_mean = {0 : [1, 7], 1 : [1, 25], 2: [1, 1000]}
223
  for i in range(pyt_centers.shape[0]):
224
- for j in range(i+1, pyt_centers.shape[0]):
225
- etype = (self.is_apex[i] + self.is_apex[j])
226
-
227
- points_inter = pyt_centers[i][None] + torch.linspace(0, 1, 20)[:, None].to(self.device) * (pyt_centers[j][None] - pyt_centers[i][None])
228
- min_mean_dist = 1000
229
- all_dists = []
230
- best_ki = -1
231
- best_uvi = -1
232
- for ki in range(N):
233
- cki = self.gestalt_to_colmap_cams[ki]
234
-
235
- if not ( (cki in center_visibility[i]) or (cki in center_visibility[j]) ):
236
- continue
237
- if self.broken_cams[ki]:
238
- continue
239
-
240
- uvi = torch.round(self.pyt_cameras[ki].transform_points(points_inter)[:, :2]).cpu().numpy().astype(int)
241
- if (uvi <= 0).any() or (uvi[:,0] >= self.width).any() or (uvi[:,1] >= self.height).any():
242
- continue
243
- mean_dist = edge_dists[ki][etype][uvi[:,1], uvi[:,0]].mean()
244
- all_dists.append(mean_dist)
245
- if mean_dist < min_mean_dist:
246
- min_mean_dist = mean_dist
247
- best_ki = ki
248
- best_uvi = uvi
249
-
250
- if best_ki == -1:
251
- continue
252
- ths = thresholds_min_mean[etype]
253
- if min_mean_dist < ths[0] and np.mean(all_dists) < ths[1]:
254
- edges.append((i,j))
 
255
  if len(edges) == 0:
256
  edges.append((0, 0))
257
  return edges
 
8
  import torch
9
  from pytorch3d.renderer import PerspectiveCameras
10
  from hoho.color_mappings import gestalt_color_mapping
11
+ from PIL import Image
12
 
13
  def my_empty_solution():
14
  return np.zeros((18,3)), [(0, 0)]
 
57
 
58
  in_this_image = np.array([cki in p.image_ids for p in self.points3D.values()])
59
  uv = torch.round(self.pyt_cameras[ki].transform_points(self.verts)[:, :2]).cpu().numpy().astype(int)
60
+ height, width = dist.shape
61
+ uv_inl = (uv[:, 0] >= 0) * (uv[:, 1] >= 0) * (uv[:, 0] < width) * (uv[:, 1] < height) * in_this_image
62
  proj_uv.append((uv, uv_inl))
63
  uv = uv[uv_inl]
64
 
 
115
  def process_vertices(self):
116
  human_entry = self.human_entry
117
 
118
+ col_cams = [hoho.Rt_to_eye_target(Image.new('RGB', (human_entry['cameras'][colmap_img.camera_id].width, human_entry['cameras'][colmap_img.camera_id].height)), to_K(*human_entry['cameras'][colmap_img.camera_id].params), quaternion_to_rotation_matrix(colmap_img.qvec), colmap_img.tvec) for colmap_img in human_entry['images'].values()]
119
+ # eye, target, up, fov = col_cams[0]
120
 
121
  cameras, images, self.points3D = human_entry['cameras'], human_entry['images'], human_entry['points3d']
122
  colmap_cameras_tf = list(human_entry['images'].keys())
123
  self.xyz = np.stack([p.xyz for p in self.points3D.values()])
124
  color = np.stack([p.rgb for p in self.points3D.values()])
125
  self.gests = [np.array(gest0) for gest0 in human_entry['gestalt']]
126
+ # for ki in range(1, len(self.gests)):
127
+ # if self.gests[ki].shape != self.gests[0].shape:
128
+ # self.gests[ki] = self.gests[ki].transpose(1,0,2)
129
+
130
+ to_camera_ids = np.array([colmap_img.camera_id for colmap_img in human_entry['images'].values()])
131
 
132
  gestalt_camcet = np.stack([eye for eye, target, up, fov in itertools.starmap(hoho.Rt_to_eye_target, zip(*[human_entry[k] for k in 'ade20k K R t'.split()]))])
133
  col_camcet = np.stack([eye for eye, target, up, fov in col_cams])
134
  self.gestalt_to_colmap_cams = [colmap_cameras_tf[np.argmin(((gcam - col_camcet)**2).sum(1)**0.5)] for gcam in gestalt_camcet]
135
  self.broken_cams = np.array([np.min(((gcam - col_camcet)**2).sum(1)**0.5) for gcam in gestalt_camcet]) > 300
136
 
 
137
  N = len(self.gestalt_to_colmap_cams)
 
138
  R = np.stack([quaternion_to_rotation_matrix(human_entry['images'][self.gestalt_to_colmap_cams[ind]].qvec) for ind in range(N)])
139
  T = np.stack([human_entry['images'][self.gestalt_to_colmap_cams[ind]].tvec for ind in range(N)])
140
 
141
  R = np.linalg.inv(R)
142
+ image_size = []
143
+ K = []
144
+ for ind in range(N):
145
+ cid = to_camera_ids[np.array(colmap_cameras_tf) == self.gestalt_to_colmap_cams[ind]][0]
146
+ sz = np.array([cameras[cid].height, cameras[cid].width])
147
+ image_size.append(sz)
148
+ K.append(to_K(*human_entry['cameras'][cid].params))
149
+ image_size = np.stack(image_size)
150
+ K = np.stack(K)
151
+ # K = to_K(*human_entry['cameras'][1].params)[None].repeat(N, 0)
152
+ # self.height, self.width = cameras[1].height, cameras[1].width
153
+ # image_size = torch.Tensor([self.height, self.width]).repeat(N, 1)
154
  self.pyt_cameras = PerspectiveCameras(device=self.device, R=R, T=T, in_ndc=False, focal_length=K[:, 0, :1], principal_point=K[:, :2, 2], image_size=image_size)
155
+
156
+
157
  self.verts = torch.from_numpy(self.xyz.astype(np.float32)).to(self.device)
158
 
159
  centers_apex, assigned_apex = self.cluster_points(['apex'])
 
226
  dist = cv2.distanceTransform(1-edge_mask, cv2.DIST_L2, 3)
227
  per_type_dists[etype] = dist
228
  edge_dists.append(per_type_dists)
229
+ height, width, _ = gest.shape
230
+
231
  uv = torch.round(self.pyt_cameras[ki].transform_points(pyt_centers)[:, :2]).cpu().numpy().astype(int)
232
+ uv_inl = (uv[:, 0] >= 0) * (uv[:, 1] >= 0) * (uv[:, 0] < width) * (uv[:, 1] < height)
233
  uv = uv[uv_inl]
234
  uvs.append(uv)
235
 
 
237
  thresholds_min_mean = {0 : [5, 7], 1 : [9, 25], 2: [30, 1000]}
238
  # thresholds_min_mean = {0 : [1, 7], 1 : [1, 25], 2: [1, 1000]}
239
  for i in range(pyt_centers.shape[0]):
240
+ for j in range(i+1, pyt_centers.shape[0]):
241
+ etype = (self.is_apex[i] + self.is_apex[j])
242
+
243
+ points_inter = pyt_centers[i][None] + torch.linspace(0, 1, 20)[:, None].to(self.device) * (pyt_centers[j][None] - pyt_centers[i][None])
244
+ min_mean_dist = 1000
245
+ all_dists = []
246
+ best_ki = -1
247
+ best_uvi = -1
248
+ for ki in range(N):
249
+ cki = self.gestalt_to_colmap_cams[ki]
250
+
251
+ if not ( (cki in center_visibility[i]) or (cki in center_visibility[j]) ):
252
+ continue
253
+ if self.broken_cams[ki]:
254
+ continue
255
+
256
+ height, width, _ = self.gests[ki].shape
257
+ uvi = torch.round(self.pyt_cameras[ki].transform_points(points_inter)[:, :2]).cpu().numpy().astype(int)
258
+ if (uvi <= 0).any() or (uvi[:,0] >= width).any() or (uvi[:,1] >= height).any():
259
+ continue
260
+ mean_dist = edge_dists[ki][etype][uvi[:,1], uvi[:,0]].mean()
261
+ all_dists.append(mean_dist)
262
+ if mean_dist < min_mean_dist:
263
+ min_mean_dist = mean_dist
264
+ best_ki = ki
265
+ best_uvi = uvi
266
+
267
+ if best_ki == -1:
268
+ continue
269
+ ths = thresholds_min_mean[etype]
270
+ if min_mean_dist < ths[0] and np.mean(all_dists) < ths[1]:
271
+ edges.append((i,j))
272
  if len(edges) == 0:
273
  edges.append((0, 0))
274
  return edges
testing.ipynb CHANGED
@@ -176,7 +176,7 @@
176
  },
177
  {
178
  "cell_type": "code",
179
- "execution_count": 127,
180
  "id": "88f4fc8f-efa9-404b-9073-c7d4a73f9075",
181
  "metadata": {},
182
  "outputs": [
@@ -184,21 +184,38 @@
184
  "name": "stdout",
185
  "output_type": "stream",
186
  "text": [
187
- "2.391964449292722 1.77839901463355\n"
188
- ]
189
- },
190
- {
191
- "ename": "KeyboardInterrupt",
192
- "evalue": "",
193
- "output_type": "error",
194
- "traceback": [
195
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
196
- "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
197
- "Cell \u001b[0;32mIn[127], line 20\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m ki, entry \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(dataset):\n\u001b[1;32m 15\u001b[0m \u001b[38;5;66;03m# if ki < 153: wrong camera ids\u001b[39;00m\n\u001b[1;32m 16\u001b[0m \u001b[38;5;66;03m# continue\u001b[39;00m\n\u001b[1;32m 17\u001b[0m \u001b[38;5;66;03m# if ki < 162: different cameras and different image sizes\u001b[39;00m\n\u001b[1;32m 18\u001b[0m \u001b[38;5;66;03m# continue\u001b[39;00m\n\u001b[1;32m 19\u001b[0m solver \u001b[38;5;241m=\u001b[39m GeomSolver()\n\u001b[0;32m---> 20\u001b[0m vertices, edges \u001b[38;5;241m=\u001b[39m \u001b[43msolver\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msolve\u001b[49m\u001b[43m(\u001b[49m\u001b[43mentry\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 22\u001b[0m scores0 \u001b[38;5;241m=\u001b[39m (compute_WED(np\u001b[38;5;241m.\u001b[39mzeros((entry[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mwf_vertices\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m],\u001b[38;5;241m3\u001b[39m)),\n\u001b[1;32m 23\u001b[0m [(\u001b[38;5;241m0\u001b[39m, \u001b[38;5;241m0\u001b[39m)],\n\u001b[1;32m 24\u001b[0m np\u001b[38;5;241m.\u001b[39marray(entry[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mwf_vertices\u001b[39m\u001b[38;5;124m'\u001b[39m]),\n\u001b[1;32m 25\u001b[0m np\u001b[38;5;241m.\u001b[39marray(entry[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mwf_edges\u001b[39m\u001b[38;5;124m'\u001b[39m])))\n\u001b[1;32m 27\u001b[0m scores \u001b[38;5;241m=\u001b[39m (compute_WED(vertices,\n\u001b[1;32m 28\u001b[0m edges,\n\u001b[1;32m 29\u001b[0m np\u001b[38;5;241m.\u001b[39marray(entry[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mwf_vertices\u001b[39m\u001b[38;5;124m'\u001b[39m]),\n\u001b[1;32m 30\u001b[0m np\u001b[38;5;241m.\u001b[39marray(entry[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mwf_edges\u001b[39m\u001b[38;5;124m'\u001b[39m])))\n",
198
- "File \u001b[0;32m~/src/DRAK/geom_solver.py:263\u001b[0m, in \u001b[0;36mGeomSolver.solve\u001b[0;34m(self, entry, visualize)\u001b[0m\n\u001b[1;32m 261\u001b[0m human_entry \u001b[38;5;241m=\u001b[39m convert_entry_to_human_readable(entry)\n\u001b[1;32m 262\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhuman_entry \u001b[38;5;241m=\u001b[39m human_entry\n\u001b[0;32m--> 263\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprocess_vertices\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 264\u001b[0m vertices \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvertices_aug\n\u001b[1;32m 265\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreturn_edges:\n",
199
- "File \u001b[0;32m~/src/DRAK/geom_solver.py:145\u001b[0m, in \u001b[0;36mGeomSolver.process_vertices\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 142\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mverts \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mfrom_numpy(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mxyz\u001b[38;5;241m.\u001b[39mastype(np\u001b[38;5;241m.\u001b[39mfloat32))\u001b[38;5;241m.\u001b[39mto(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdevice)\n\u001b[1;32m 144\u001b[0m centers_apex, assigned_apex \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcluster_points([\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mapex\u001b[39m\u001b[38;5;124m'\u001b[39m])\n\u001b[0;32m--> 145\u001b[0m centers_eave, assigned_eave \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcluster_points\u001b[49m\u001b[43m(\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43meave_end_point\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 146\u001b[0m centers \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mconcatenate((centers_apex, centers_eave))\n\u001b[1;32m 147\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39massigned_points \u001b[38;5;241m=\u001b[39m assigned_apex \u001b[38;5;241m+\u001b[39m assigned_eave\n",
200
- "File \u001b[0;32m~/src/DRAK/geom_solver.py:83\u001b[0m, in \u001b[0;36mGeomSolver.cluster_points\u001b[0;34m(self, point_types)\u001b[0m\n\u001b[1;32m 81\u001b[0m \t\u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mxyz[selected_points][dense_pnts], [point_inds[selected_points][dense_pnts]]\n\u001b[1;32m 82\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m tempi \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;241m1\u001b[39m, \u001b[38;5;241m30\u001b[39m):\n\u001b[0;32m---> 83\u001b[0m retval, temp_bestLabels, temp_centers \u001b[38;5;241m=\u001b[39m \u001b[43mcv2\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mkmeans\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mxyz\u001b[49m\u001b[43m[\u001b[49m\u001b[43mselected_points\u001b[49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[43mdense_pnts\u001b[49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mastype\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfloat32\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtempi\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcriteria\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m200\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43mflags\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 84\u001b[0m cpnts \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mfrom_numpy(temp_centers\u001b[38;5;241m.\u001b[39mastype(np\u001b[38;5;241m.\u001b[39mfloat32))[\u001b[38;5;28;01mNone\u001b[39;00m]\n\u001b[1;32m 85\u001b[0m bdists, inds, nn \u001b[38;5;241m=\u001b[39m ball_query(cpnts, cpnts, K\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m2\u001b[39m, radius\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1.2\u001b[39m\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mkmeans_th) \n",
201
- "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
  ]
203
  }
204
  ],
@@ -209,7 +226,7 @@
209
  "torch.manual_seed(0)\n",
210
  "# One shard of the dataset\n",
211
  "dataset = wds.WebDataset(hf_hub_download(repo_id='usm3d/hoho-train-set',\n",
212
- " filename='data/train/hoho_v3_000-of-032.tar.gz',\n",
213
  " repo_type=\"dataset\"))\n",
214
  "\n",
215
  "dataset = dataset.decode()\n",
@@ -217,10 +234,10 @@
217
  "sc0 = []\n",
218
  "sc = []\n",
219
  "for ki, entry in enumerate(dataset):\n",
220
- " # if ki < 153: wrong camera ids\n",
221
- " # continue\n",
222
- " # if ki < 162: different cameras and different image sizes\n",
223
- " # continue\n",
224
  " solver = GeomSolver()\n",
225
  " vertices, edges = solver.solve(entry)\n",
226
  " \n",
@@ -258,22 +275,25 @@
258
  },
259
  {
260
  "cell_type": "code",
261
- "execution_count": 123,
262
  "id": "a34d0a2b-9986-47cc-8a3f-c15397370c4d",
263
  "metadata": {},
264
  "outputs": [
265
  {
266
  "data": {
267
  "text/plain": [
268
- "162"
269
  ]
270
  },
271
- "execution_count": 123,
272
  "metadata": {},
273
  "output_type": "execute_result"
274
  }
275
  ],
276
  "source": [
 
 
 
277
  "ki"
278
  ]
279
  },
 
176
  },
177
  {
178
  "cell_type": "code",
179
+ "execution_count": 195,
180
  "id": "88f4fc8f-efa9-404b-9073-c7d4a73f9075",
181
  "metadata": {},
182
  "outputs": [
 
184
  "name": "stdout",
185
  "output_type": "stream",
186
  "text": [
187
+ "2.4183324229440974 2.038011551581092\n",
188
+ "1.9395643500589714 2.0791329825966307\n",
189
+ "2.815541573372287 2.5366888251094535\n",
190
+ "2.3254810143936755 1.648047653136293\n",
191
+ "2.3286533191380765 1.9072141098956248\n",
192
+ "2.342868026940067 1.9669375395419237\n",
193
+ "2.04873204164575 1.8261703137595002\n",
194
+ "2.167980973024546 1.5160824709801253\n",
195
+ "2.571328055702413 2.020824999345514\n",
196
+ "2.0127597511603774 2.0591274017651693\n",
197
+ "1.961080548873729 1.8200081675273143\n",
198
+ "2.1617889927813057 1.6349180819215263\n",
199
+ "2.0817899095268078 1.5980454240474442\n",
200
+ "2.484359575186481 1.9029737257539197\n",
201
+ "2.2997637652539 1.794924573842486\n",
202
+ "2.440276048704617 2.5473207175534065\n",
203
+ "2.135748984421359 1.6773471477819646\n",
204
+ "2.3955711940075584 2.5549005605155117\n",
205
+ "2.516982526006783 2.2057496708814113\n",
206
+ "2.521576698581939 1.615795718071218\n",
207
+ "1.8495344082304994 1.57835603284867\n",
208
+ "2.932654420428436 1.8880643234492105\n",
209
+ "1.9387187366052139 1.6981983307675732\n",
210
+ "2.135296338517323 1.8054484663488366\n",
211
+ "2.527322316920579 1.7985373132549314\n",
212
+ "2.681606928809334 2.2261780476238493\n",
213
+ "2.4613439446210306 1.6635009702924486\n",
214
+ "2.3497006297365304 1.815488520909667\n",
215
+ "2.2946897561795074 1.6075314409425536\n",
216
+ "2.2292662656029454 2.059612285543195\n",
217
+ "Averages\n",
218
+ "2.312343783912538 1.9030379122529486\n"
219
  ]
220
  }
221
  ],
 
226
  "torch.manual_seed(0)\n",
227
  "# One shard of the dataset\n",
228
  "dataset = wds.WebDataset(hf_hub_download(repo_id='usm3d/hoho-train-set',\n",
229
+ " filename='data/train/hoho_v3_001-of-032.tar.gz',\n",
230
  " repo_type=\"dataset\"))\n",
231
  "\n",
232
  "dataset = dataset.decode()\n",
 
234
  "sc0 = []\n",
235
  "sc = []\n",
236
  "for ki, entry in enumerate(dataset):\n",
237
+ " # if ki < 153: # wrong camera ids\n",
238
+ " # continue\n",
239
+ " # if ki < 162: # different cameras and different image sizes\n",
240
+ " # continue\n",
241
  " solver = GeomSolver()\n",
242
  " vertices, edges = solver.solve(entry)\n",
243
  " \n",
 
275
  },
276
  {
277
  "cell_type": "code",
278
+ "execution_count": 185,
279
  "id": "a34d0a2b-9986-47cc-8a3f-c15397370c4d",
280
  "metadata": {},
281
  "outputs": [
282
  {
283
  "data": {
284
  "text/plain": [
285
+ "0"
286
  ]
287
  },
288
+ "execution_count": 185,
289
  "metadata": {},
290
  "output_type": "execute_result"
291
  }
292
  ],
293
  "source": [
294
+ "# human_entry['images'][1]\n",
295
+ "# # human_entry['cameras'][1].width\n",
296
+ "# Image.new('RGB', (human_entry['cameras'][1].width, human_entry['cameras'][1].height)).height\n",
297
  "ki"
298
  ]
299
  },