|
{ |
|
"_name_or_path": "logs/checkpoint", |
|
"action_downsample": 1, |
|
"action_fps": 15, |
|
"action_option": "dense", |
|
"architectures": [ |
|
"DVGFormerModel" |
|
], |
|
"backbone_downsample": 14, |
|
"cropped_sensor_width": 36.0, |
|
"drone_types": [ |
|
0, |
|
1 |
|
], |
|
"fix_image_width": true, |
|
"focal_alpha": 0.9, |
|
"fps": 3, |
|
"fps_downsample": 5, |
|
"gpt2_config": { |
|
"action_downsample": 1, |
|
"action_fps": 15, |
|
"architectures": [ |
|
"UAVPoseNetModel" |
|
], |
|
"backbone_downsample": 14, |
|
"cropped_sensor_width": 36.0, |
|
"fps_downsample": 5, |
|
"hdf5_fname": "dataset_full.h5", |
|
"image_resolution": [ |
|
168, |
|
294 |
|
], |
|
"model_type": "gpt2", |
|
"n_action_to_predict": 5, |
|
"n_embd": 384, |
|
"n_head": 6, |
|
"n_positions": 1562, |
|
"n_token_image": 45, |
|
"n_token_to_predict": 5, |
|
"n_token_total": 52, |
|
"per_token_preds": 1, |
|
"root": "youtube_drone_videos", |
|
"torch_dtype": "bfloat16", |
|
"vision_feat_dim": 384 |
|
}, |
|
"hdf5_fname": "dataset_full.h5", |
|
"hidden_size": 384, |
|
"ignore_value": -100, |
|
"image_featmap_shape": [ |
|
5, |
|
9 |
|
], |
|
"image_resolution": [ |
|
168, |
|
294 |
|
], |
|
"loss_coef_action": 1, |
|
"loss_coef_drone_type": 0, |
|
"loss_coef_future": 0, |
|
"loss_coef_state": 0, |
|
"loss_coef_stop": 0, |
|
"max_model_frames": 150, |
|
"model_type": "dvgformer", |
|
"motion_option": "local", |
|
"n_action_to_predict": 5, |
|
"n_future_frames": 15, |
|
"n_token_action": 1, |
|
"n_token_boa": 1, |
|
"n_token_drone_type": 1, |
|
"n_token_frame": 52, |
|
"n_token_image": 45, |
|
"n_token_noise": 1, |
|
"n_token_predict": 5, |
|
"n_token_prepend": 2, |
|
"n_token_quality": 0, |
|
"n_token_state": 1, |
|
"n_token_to_predict": 5, |
|
"n_token_total": 52, |
|
"num_quantile_bins": 10, |
|
"pad_side": "right", |
|
"pad_token_value": 0, |
|
"per_token_preds": 1, |
|
"prediction_option": "iterative", |
|
"root": "youtube_drone_videos", |
|
"test_gt_forcing": "allframe", |
|
"torch_dtype": "bfloat16", |
|
"transformers_version": "4.45.2", |
|
"use_depth": true, |
|
"use_quality_mlps": false, |
|
"vision_backbone": "dinov2_vits14_reg", |
|
"vision_feat_dim": 384 |
|
} |
|
|