|
collect_input_args = [ |
|
'image_lr', |
|
'crops_image_hr', |
|
'depth_gt', |
|
'crop_depths', |
|
'bboxs', |
|
'image_hr', |
|
] |
|
convert_syncbn = True |
|
debug = False |
|
env_cfg = dict( |
|
cudnn_benchmark=True, |
|
dist_cfg=dict(backend='nccl'), |
|
mp_cfg=dict(mp_start_method='forkserver')) |
|
find_unused_parameters = True |
|
general_dataloader = dict( |
|
batch_size=1, |
|
dataset=dict( |
|
dataset_name='', gt_dir=None, rgb_image_dir='', type='ImageDataset'), |
|
num_workers=2) |
|
launcher = 'pytorch' |
|
log_name = 'fine_pretrain' |
|
max_depth = 80 |
|
min_depth = 0.001 |
|
model = dict( |
|
coarse_branch=dict( |
|
attractor_alpha=1000, |
|
attractor_gamma=2, |
|
attractor_kind='mean', |
|
attractor_type='inv', |
|
aug=True, |
|
bin_centers_type='softplus', |
|
bin_embedding_dim=128, |
|
clip_grad=0.1, |
|
dataset='nyu', |
|
depth_anything=True, |
|
distributed=True, |
|
do_resize=False, |
|
force_keep_ar=True, |
|
freeze_midas_bn=True, |
|
gpu='NULL', |
|
img_size=[ |
|
392, |
|
518, |
|
], |
|
inverse_midas=False, |
|
log_images_every=0.1, |
|
max_depth=80, |
|
max_temp=50.0, |
|
max_translation=100, |
|
memory_efficient=True, |
|
midas_model_type='vitl', |
|
min_depth=0.001, |
|
min_temp=0.0212, |
|
model='zoedepth', |
|
n_attractors=[ |
|
16, |
|
8, |
|
4, |
|
1, |
|
], |
|
n_bins=64, |
|
name='ZoeDepth', |
|
notes='', |
|
output_distribution='logbinomial', |
|
prefetch=False, |
|
pretrained_resource='local::./work_dir/DepthAnything_vitl.pt', |
|
print_losses=False, |
|
project='ZoeDepth', |
|
random_crop=False, |
|
random_translate=False, |
|
root='.', |
|
save_dir='', |
|
shared_dict='NULL', |
|
tags='', |
|
train_midas=True, |
|
translate_prob=0.2, |
|
type='DA-ZoeDepth', |
|
uid='NULL', |
|
use_amp=False, |
|
use_pretrained_midas=True, |
|
use_shared_dict=False, |
|
validate_every=0.25, |
|
version_name='v1', |
|
workers=16), |
|
fine_branch=dict( |
|
attractor_alpha=1000, |
|
attractor_gamma=2, |
|
attractor_kind='mean', |
|
attractor_type='inv', |
|
aug=True, |
|
bin_centers_type='softplus', |
|
bin_embedding_dim=128, |
|
clip_grad=0.1, |
|
dataset='nyu', |
|
depth_anything=True, |
|
distributed=True, |
|
do_resize=False, |
|
force_keep_ar=True, |
|
freeze_midas_bn=True, |
|
gpu='NULL', |
|
img_size=[ |
|
392, |
|
518, |
|
], |
|
inverse_midas=False, |
|
log_images_every=0.1, |
|
max_depth=80, |
|
max_temp=50.0, |
|
max_translation=100, |
|
memory_efficient=True, |
|
midas_model_type='vitl', |
|
min_depth=0.001, |
|
min_temp=0.0212, |
|
model='zoedepth', |
|
n_attractors=[ |
|
16, |
|
8, |
|
4, |
|
1, |
|
], |
|
n_bins=64, |
|
name='ZoeDepth', |
|
notes='', |
|
output_distribution='logbinomial', |
|
prefetch=False, |
|
pretrained_resource='local::./work_dir/DepthAnything_vitl.pt', |
|
print_losses=False, |
|
project='ZoeDepth', |
|
random_crop=False, |
|
random_translate=False, |
|
root='.', |
|
save_dir='', |
|
shared_dict='NULL', |
|
tags='', |
|
train_midas=True, |
|
translate_prob=0.2, |
|
type='DA-ZoeDepth', |
|
uid='NULL', |
|
use_amp=False, |
|
use_pretrained_midas=True, |
|
use_shared_dict=False, |
|
validate_every=0.25, |
|
version_name='v1', |
|
workers=16), |
|
max_depth=80, |
|
min_depth=0.001, |
|
patch_process_shape=( |
|
392, |
|
518, |
|
), |
|
sigloss=dict(type='SILogLoss'), |
|
target='fine', |
|
type='BaselinePretrain') |
|
optim_wrapper = dict( |
|
clip_grad=dict(max_norm=0.1, norm_type=2, type='norm'), |
|
optimizer=dict(lr=4e-06, type='AdamW', weight_decay=0.01), |
|
paramwise_cfg=dict(bypass_duplicate=True, custom_keys=dict())) |
|
param_scheduler = dict( |
|
base_momentum=0.85, |
|
cycle_momentum=True, |
|
div_factor=1, |
|
final_div_factor=10000, |
|
max_momentum=0.95, |
|
pct_start=0.5, |
|
three_phase=False) |
|
project = 'patchfusion' |
|
resume = False |
|
tags = [ |
|
'fine', |
|
'da', |
|
'vitl', |
|
] |
|
test_in_dataloader = dict( |
|
batch_size=1, |
|
dataset=dict( |
|
data_root='./data/u4k', |
|
max_depth=80, |
|
min_depth=0.001, |
|
mode='infer', |
|
split='./data/u4k/splits/test.txt', |
|
transform_cfg=dict(network_process_size=[ |
|
384, |
|
512, |
|
]), |
|
type='UnrealStereo4kDataset'), |
|
num_workers=2) |
|
test_out_dataloader = dict( |
|
batch_size=1, |
|
dataset=dict( |
|
data_root='./data/u4k', |
|
max_depth=80, |
|
min_depth=0.001, |
|
mode='infer', |
|
split='./data/u4k/splits/test_out.txt', |
|
transform_cfg=dict(network_process_size=[ |
|
384, |
|
512, |
|
]), |
|
type='UnrealStereo4kDataset'), |
|
num_workers=2) |
|
train_cfg = dict( |
|
eval_start=0, |
|
log_interval=100, |
|
max_epochs=24, |
|
save_checkpoint_interval=24, |
|
train_log_img_interval=500, |
|
val_interval=2, |
|
val_log_img_interval=50, |
|
val_type='epoch_base') |
|
train_dataloader = dict( |
|
batch_size=4, |
|
dataset=dict( |
|
data_root='./data/u4k', |
|
max_depth=80, |
|
min_depth=0.001, |
|
mode='train', |
|
resize_mode='depth-anything', |
|
split='./data/u4k/splits/train.txt', |
|
transform_cfg=dict( |
|
degree=1.0, |
|
network_process_size=[ |
|
392, |
|
518, |
|
], |
|
random_crop=True, |
|
random_crop_size=( |
|
540, |
|
960, |
|
)), |
|
type='UnrealStereo4kDataset'), |
|
num_workers=4) |
|
val_dataloader = dict( |
|
batch_size=1, |
|
dataset=dict( |
|
data_root='./data/u4k', |
|
max_depth=80, |
|
min_depth=0.001, |
|
mode='infer', |
|
resize_mode='depth-anything', |
|
split='./data/u4k/splits/val.txt', |
|
transform_cfg=dict( |
|
degree=1.0, |
|
network_process_size=[ |
|
392, |
|
518, |
|
], |
|
random_crop_size=( |
|
540, |
|
960, |
|
)), |
|
type='UnrealStereo4kDataset'), |
|
num_workers=2) |
|
work_dir = './work_dir/depthanything_vitl_u4k/fine_pretrain' |
|
zoe_depth_config = dict( |
|
attractor_alpha=1000, |
|
attractor_gamma=2, |
|
attractor_kind='mean', |
|
attractor_type='inv', |
|
aug=True, |
|
bin_centers_type='softplus', |
|
bin_embedding_dim=128, |
|
clip_grad=0.1, |
|
dataset='nyu', |
|
depth_anything=True, |
|
distributed=True, |
|
do_resize=False, |
|
force_keep_ar=True, |
|
freeze_midas_bn=True, |
|
gpu='NULL', |
|
img_size=[ |
|
392, |
|
518, |
|
], |
|
inverse_midas=False, |
|
log_images_every=0.1, |
|
max_depth=80, |
|
max_temp=50.0, |
|
max_translation=100, |
|
memory_efficient=True, |
|
midas_model_type='vitl', |
|
min_depth=0.001, |
|
min_temp=0.0212, |
|
model='zoedepth', |
|
n_attractors=[ |
|
16, |
|
8, |
|
4, |
|
1, |
|
], |
|
n_bins=64, |
|
name='ZoeDepth', |
|
notes='', |
|
output_distribution='logbinomial', |
|
prefetch=False, |
|
pretrained_resource='local::./work_dir/DepthAnything_vitl.pt', |
|
print_losses=False, |
|
project='ZoeDepth', |
|
random_crop=False, |
|
random_translate=False, |
|
root='.', |
|
save_dir='', |
|
shared_dict='NULL', |
|
tags='', |
|
train_midas=True, |
|
translate_prob=0.2, |
|
type='DA-ZoeDepth', |
|
uid='NULL', |
|
use_amp=False, |
|
use_pretrained_midas=True, |
|
use_shared_dict=False, |
|
validate_every=0.25, |
|
version_name='v1', |
|
workers=16) |
|
|