{ | |
"audio_drop_path": 0.1, | |
"audio_embed_dim": 768, | |
"audio_kernel_size": 16, | |
"audio_num_blocks": 12, | |
"audio_num_heads": 12, | |
"audio_num_mel_bins": 128, | |
"audio_stride": 10, | |
"audio_target_len": 204, | |
"depth_drop_path": 0.0, | |
"depth_embed_dim": 384, | |
"depth_kernel_size": 16, | |
"depth_num_blocks": 12, | |
"depth_num_heads": 8, | |
"imu_drop_path": 0.7, | |
"imu_embed_dim": 512, | |
"imu_kernel_size": 8, | |
"imu_num_blocks": 6, | |
"imu_num_heads": 8, | |
"kernel_size": [ | |
2, | |
14, | |
14 | |
], | |
"out_embed_dim": 1024, | |
"text_embed_dim": 1024, | |
"text_num_blocks": 24, | |
"text_num_heads": 16, | |
"thermal_drop_path": 0.0, | |
"thermal_embed_dim": 768, | |
"thermal_kernel_size": 16, | |
"thermal_num_blocks": 12, | |
"thermal_num_heads": 12, | |
"video_frames": 2, | |
"vision_embed_dim": 1280, | |
"vision_num_blocks": 32, | |
"vision_num_heads": 16 | |
} |