import numpy as np import torch from PIL import Image from torchvision import transforms from datetime import datetime from .ldm.util import load_and_preprocess, instantiate_from_config from .pose_funcs import probe_pose, find_optimal_poses, get_inv_pose, add_pose, pairwise_loss from .oee.utils.elev_est_api import elev_est_api, ElevEstHelper from .sampling import sample_images def load_image(img_path, mask_path=None, preprocessor=None, threshold=0.9): img = Image.open(img_path) if preprocessor is not None: img = load_and_preprocess(preprocessor, img) else: if img.mode == 'RGBA': img = np.asarray(img, dtype=np.float32) / 255. img[img[:, :, -1] <= threshold] = [1., 1., 1., 1.] # thresholding background img = img[:, :, :3] elif img.mode == 'RGB': if mask_path is not None: mask = Image.open(mask_path) bkg = Image.new('RGB', (img.width, img.height), color=(255, 255, 255)) img = Image.composite(img, bkg, mask) img = np.asarray(img, dtype=np.float32) / 255. else: print('Wrong format:', img_path) return img def load_model_from_config(config, ckpt, device, verbose=False): print(f'Loading model from {ckpt}') pl_sd = torch.load(ckpt, map_location=device) if 'global_step' in pl_sd: step = pl_sd['global_step'] print(f'Global Step: {step}') sd = pl_sd['state_dict'] model = instantiate_from_config(config.model) m, u = model.load_state_dict(sd, strict=False) if len(m) > 0 and verbose: print('missing keys:') print(m) if len(u) > 0 and verbose: print('unexpected keys:') print(u) model.to(device) model.eval() return model def estimate_elevs(model, images, est_type=None, matcher_ckpt_path=None): num = len(images) elevs = {i: None for i in range(num)} elev_ranges = {i: None for i in range(num)} if est_type == 'all': matcher = ElevEstHelper.get_feature_matcher(matcher_ckpt_path, model.device) for i in range(num): simgs = sample_surrounding_images(model, images[i]) elev = elev_est_api(matcher, simgs, min_elev=20, max_elev=160) elevs[i] = elev for i in range(num): if elevs[i] is not None: elevs[i] = np.deg2rad(elevs[i]) for i in range(1, num): if elevs[i] is not None and elevs[0] is not None: elev_ranges[i] = np.array([ elevs[i] - elevs[0] ]) elif elevs[i] is not None: elev_ranges[i] = -make_elev_probe_range(elevs[i]) elif elevs[0] is not None: elev_ranges[i] = make_elev_probe_range(elevs[0]) elif est_type == 'simple': matcher = ElevEstHelper.get_feature_matcher(matcher_ckpt_path, model.device) simgs = sample_surrounding_images(model, images[0]) elev = elev_est_api(matcher, simgs, min_elev=20, max_elev=160) elevs[0] = np.deg2rad(elev) if elev is not None else None ae = elevs[0] if elevs[0] is not None else np.pi/2 for i in range(1, num): elev_ranges[i] = np.array([np.pi/2 - ae]) return elevs, elev_ranges def estimate_poses( model, images, seed_cand_num=8, init_type='pairwise', optm_type='pairwise', probe_ts_range=[0.02, 0.98], ts_range=[0.02, 0.98], probe_bsz=16, adjust_factor=10., adjust_iters=10, adjust_bsz=1, refine_factor=1., refine_iters=600, refine_bsz=1, noise=None, elevs=None, elev_ranges=None ): num = len(images) if elevs is None: elevs = {i: None for i in range(num)} if elev_ranges is None: elev_ranges = {i: None for i in range(num)} if num <= 2: init_type = 'pairwise' cands = {} losses = {} init_poses = {i: None for i in range(num)} pairwise_init_poses = {i: None for i in range(num)} print('Initialization: Probe', datetime.now()) images = [ img.permute(0, 2, 3, 1) for img in images ] for i in range(1, num): print('PAIR', 0, i, datetime.now()) azimuth_range = np.arange(start=0.0, stop=np.pi*2, step=np.pi*2 / seed_cand_num) all_cands = probe_pose(model, images[0], images[i], probe_ts_range, probe_bsz, theta_range=elev_ranges[i], azimuth_range=azimuth_range, noise=noise) all_cands = sorted(all_cands) print('Exploration', len(all_cands), datetime.now()) adjusted_cands = all_cands[:5] if adjust_iters > 0: adjusted_cands = [] '''only adjust the first half''' for cand in all_cands[:len(all_cands)//2]: out_poses, _, _ = find_optimal_poses( model, [images[0], images[i]], adjust_factor, bsz=adjust_bsz, n_iter=adjust_iters, init_poses={1: cand[1]}, ts_range=ts_range, print_n=100, avg_last_n=1 ) loss = pairwise_loss(out_poses[0], model, images[0], images[i], probe_ts_range, probe_bsz, noise=noise) adjusted_cands.append((loss, out_poses[0], cand[0], cand[1])) adjusted_cands = sorted(adjusted_cands)[:5] for cand in adjusted_cands: print(cand) cands[i] = [ cand[:2] for cand in adjusted_cands ] losses[i] = [loss if (init_type == 'pairwise') else 0.0 for loss, _ in cands[i]] pairwise_init_poses[i] = min(cands[i])[1] print('Selection', datetime.now()) if init_type == 'triangular': for i in range(1, num): for j in range(i+1, num): iloss = [ [None for v in range(0, len(cands[j]))] for u in range(0, len(cands[i])) ] jloss = [ [None for u in range(0, len(cands[i]))] for v in range(0, len(cands[j])) ] for u in range(0, len(cands[i])): la, pa = cands[i][u] # pose i -> 0 pa = get_inv_pose(pa) for v in range(0, len(cands[j])): # pose 0 -> j lb, pb = cands[j][v] theta, azimuth, radius = add_pose(pa, pb) lp = pairwise_loss([theta, azimuth, radius], model, images[i], images[j], probe_ts_range, probe_bsz, noise=noise) iloss[u][v] = la + lb + lp jloss[v][u] = la + lb + lp for u in range(0, len(cands[i])): losses[i][u] += min(min(iloss[u]), cands[i][u][0]*3) for v in range(0, len(cands[j])): losses[j][v] += min(min(jloss[v]), cands[j][v][0]*3) for i in range(1, num): ranks = sorted([x for x in range(0, len(losses[i]))], key=lambda x: losses[i][x]) min_rank = ranks[0] for u in range(0, len(cands[i])): print(cands[i][u], losses[i][u]) print(i, 'SELECT', min_rank, losses[i][min_rank]) init_poses[i] = cands[i][min_rank][1] print('Refinement', datetime.now()) combinations = None if optm_type == 'pairwise': combinations = [ (0, i) for i in range(1, num) ] + [ (i, 0) for i in range(1, num) ] elif optm_type == 'triangular': combinations = [] for i in range(0, num): for j in range(i+1, num): combinations.append((i, j)) combinations.append((j, i)) print('Combinations', len(combinations), combinations) '''Refinement''' out_poses, _, loss = find_optimal_poses( model, images, refine_factor, bsz=refine_bsz, n_iter=(num-1)*refine_iters, init_poses=init_poses, ts_range=ts_range, combinations=combinations, avg_last_n=20, print_n=100 ) print('Done', datetime.now()) aux_data = { 'tri_init_sph': init_poses, 'pw_init_sph': pairwise_init_poses, 'elev': elevs } return out_poses, aux_data def make_elev_probe_range(elev, interval=np.pi/4): up_range = np.arange(elev, 0, -interval) down_range = np.arange(elev+interval, np.pi, interval) probe_range = np.concatenate([up_range, down_range]) probe_range -= elev return probe_range def sample_surrounding_images(model, image): s0 = sample_images(model, image, float(np.deg2rad(-10)), 0, 0, n_samples=1) s1 = sample_images(model, image, float(np.deg2rad(+10)), 0, 0, n_samples=1) s2 = sample_images(model, image, 0, float(np.deg2rad(-10)), 0, n_samples=1) s3 = sample_images(model, image, 0, float(np.deg2rad(+10)), 0, n_samples=1) return s0 + s1 + s2 + s3