zhyever's picture
Upload 40 files
935a102 verified
collect_input_args = [
'image_lr',
'crops_image_hr',
'depth_gt',
'crop_depths',
'bboxs',
'image_hr',
]
convert_syncbn = True
debug = False
env_cfg = dict(
cudnn_benchmark=True,
dist_cfg=dict(backend='nccl'),
mp_cfg=dict(mp_start_method='forkserver'))
find_unused_parameters = True
general_dataloader = dict(
batch_size=1,
dataset=dict(
dataset_name='', gt_dir=None, rgb_image_dir='', type='ImageDataset'),
num_workers=2)
launcher = 'pytorch'
log_name = 'fine_pretrain'
max_depth = 80
min_depth = 0.001
model = dict(
coarse_branch=dict(
attractor_alpha=1000,
attractor_gamma=2,
attractor_kind='mean',
attractor_type='inv',
aug=True,
bin_centers_type='softplus',
bin_embedding_dim=128,
clip_grad=0.1,
dataset='nyu',
depth_anything=True,
distributed=True,
do_resize=False,
force_keep_ar=True,
freeze_midas_bn=True,
gpu='NULL',
img_size=[
392,
518,
],
inverse_midas=False,
log_images_every=0.1,
max_depth=80,
max_temp=50.0,
max_translation=100,
memory_efficient=True,
midas_model_type='vitb',
min_depth=0.001,
min_temp=0.0212,
model='zoedepth',
n_attractors=[
16,
8,
4,
1,
],
n_bins=64,
name='ZoeDepth',
notes='',
output_distribution='logbinomial',
prefetch=False,
pretrained_resource='local::./work_dir/DepthAnything_vitb.pt',
print_losses=False,
project='ZoeDepth',
random_crop=False,
random_translate=False,
root='.',
save_dir='',
shared_dict='NULL',
tags='',
train_midas=True,
translate_prob=0.2,
type='DA-ZoeDepth',
uid='NULL',
use_amp=False,
use_pretrained_midas=True,
use_shared_dict=False,
validate_every=0.25,
version_name='v1',
workers=16),
fine_branch=dict(
attractor_alpha=1000,
attractor_gamma=2,
attractor_kind='mean',
attractor_type='inv',
aug=True,
bin_centers_type='softplus',
bin_embedding_dim=128,
clip_grad=0.1,
dataset='nyu',
depth_anything=True,
distributed=True,
do_resize=False,
force_keep_ar=True,
freeze_midas_bn=True,
gpu='NULL',
img_size=[
392,
518,
],
inverse_midas=False,
log_images_every=0.1,
max_depth=80,
max_temp=50.0,
max_translation=100,
memory_efficient=True,
midas_model_type='vitb',
min_depth=0.001,
min_temp=0.0212,
model='zoedepth',
n_attractors=[
16,
8,
4,
1,
],
n_bins=64,
name='ZoeDepth',
notes='',
output_distribution='logbinomial',
prefetch=False,
pretrained_resource='local::./work_dir/DepthAnything_vitb.pt',
print_losses=False,
project='ZoeDepth',
random_crop=False,
random_translate=False,
root='.',
save_dir='',
shared_dict='NULL',
tags='',
train_midas=True,
translate_prob=0.2,
type='DA-ZoeDepth',
uid='NULL',
use_amp=False,
use_pretrained_midas=True,
use_shared_dict=False,
validate_every=0.25,
version_name='v1',
workers=16),
max_depth=80,
min_depth=0.001,
patch_process_shape=(
392,
518,
),
sigloss=dict(type='SILogLoss'),
target='fine',
type='BaselinePretrain')
optim_wrapper = dict(
clip_grad=dict(max_norm=0.1, norm_type=2, type='norm'),
optimizer=dict(lr=4e-06, type='AdamW', weight_decay=0.01),
paramwise_cfg=dict(bypass_duplicate=True, custom_keys=dict()))
param_scheduler = dict(
base_momentum=0.85,
cycle_momentum=True,
div_factor=1,
final_div_factor=10000,
max_momentum=0.95,
pct_start=0.5,
three_phase=False)
project = 'patchfusion'
resume = False
tags = [
'fine',
'da',
'vitb',
]
test_in_dataloader = dict(
batch_size=1,
dataset=dict(
data_root='./data/u4k',
max_depth=80,
min_depth=0.001,
mode='infer',
split='./data/u4k/splits/test.txt',
transform_cfg=dict(network_process_size=[
384,
512,
]),
type='UnrealStereo4kDataset'),
num_workers=2)
test_out_dataloader = dict(
batch_size=1,
dataset=dict(
data_root='./data/u4k',
max_depth=80,
min_depth=0.001,
mode='infer',
split='./data/u4k/splits/test_out.txt',
transform_cfg=dict(network_process_size=[
384,
512,
]),
type='UnrealStereo4kDataset'),
num_workers=2)
train_cfg = dict(
eval_start=0,
log_interval=100,
max_epochs=24,
save_checkpoint_interval=24,
train_log_img_interval=500,
val_interval=2,
val_log_img_interval=50,
val_type='epoch_base')
train_dataloader = dict(
batch_size=4,
dataset=dict(
data_root='./data/u4k',
max_depth=80,
min_depth=0.001,
mode='train',
resize_mode='depth-anything',
split='./data/u4k/splits/train.txt',
transform_cfg=dict(
degree=1.0,
network_process_size=[
392,
518,
],
random_crop=True,
random_crop_size=(
540,
960,
)),
type='UnrealStereo4kDataset'),
num_workers=4)
val_dataloader = dict(
batch_size=1,
dataset=dict(
data_root='./data/u4k',
max_depth=80,
min_depth=0.001,
mode='infer',
resize_mode='depth-anything',
split='./data/u4k/splits/val.txt',
transform_cfg=dict(
degree=1.0,
network_process_size=[
392,
518,
],
random_crop_size=(
540,
960,
)),
type='UnrealStereo4kDataset'),
num_workers=2)
work_dir = './work_dir/depthanything_vitb_u4k/fine_pretrain'
zoe_depth_config = dict(
attractor_alpha=1000,
attractor_gamma=2,
attractor_kind='mean',
attractor_type='inv',
aug=True,
bin_centers_type='softplus',
bin_embedding_dim=128,
clip_grad=0.1,
dataset='nyu',
depth_anything=True,
distributed=True,
do_resize=False,
force_keep_ar=True,
freeze_midas_bn=True,
gpu='NULL',
img_size=[
392,
518,
],
inverse_midas=False,
log_images_every=0.1,
max_depth=80,
max_temp=50.0,
max_translation=100,
memory_efficient=True,
midas_model_type='vitb',
min_depth=0.001,
min_temp=0.0212,
model='zoedepth',
n_attractors=[
16,
8,
4,
1,
],
n_bins=64,
name='ZoeDepth',
notes='',
output_distribution='logbinomial',
prefetch=False,
pretrained_resource='local::./work_dir/DepthAnything_vitb.pt',
print_losses=False,
project='ZoeDepth',
random_crop=False,
random_translate=False,
root='.',
save_dir='',
shared_dict='NULL',
tags='',
train_midas=True,
translate_prob=0.2,
type='DA-ZoeDepth',
uid='NULL',
use_amp=False,
use_pretrained_midas=True,
use_shared_dict=False,
validate_every=0.25,
version_name='v1',
workers=16)