model: | |
target: hy3dgen.shapegen.models.Hunyuan3DDiT | |
params: | |
in_channels: 64 | |
context_in_dim: 1536 | |
hidden_size: 1024 | |
mlp_ratio: 4.0 | |
num_heads: 16 | |
depth: 16 | |
depth_single_blocks: 32 | |
axes_dim: [ 64 ] | |
theta: 10000 | |
qkv_bias: True | |
vae: | |
target: hy3dgen.shapegen.models.ShapeVAE | |
params: | |
num_latents: 3072 | |
embed_dim: 64 | |
num_freqs: 8 | |
include_pi: false | |
heads: 16 | |
width: 1024 | |
num_decoder_layers: 16 | |
qkv_bias: false | |
qk_norm: true | |
scale_factor: 0.9990943042622529 | |
conditioner: | |
target: hy3dgen.shapegen.models.SingleImageEncoder | |
params: | |
main_image_encoder: | |
type: DinoImageEncoder # dino giant | |
kwargs: | |
config: | |
attention_probs_dropout_prob: 0.0 | |
drop_path_rate: 0.0 | |
hidden_act: gelu | |
hidden_dropout_prob: 0.0 | |
hidden_size: 1536 | |
image_size: 518 | |
initializer_range: 0.02 | |
layer_norm_eps: 1.e-6 | |
layerscale_value: 1.0 | |
mlp_ratio: 4 | |
model_type: dinov2 | |
num_attention_heads: 24 | |
num_channels: 3 | |
num_hidden_layers: 40 | |
patch_size: 14 | |
qkv_bias: true | |
torch_dtype: float32 | |
use_swiglu_ffn: true | |
image_size: 518 | |
scheduler: | |
target: hy3dgen.shapegen.schedulers.FlowMatchEulerDiscreteScheduler | |
params: | |
num_train_timesteps: 1000 | |
image_processor: | |
target: hy3dgen.shapegen.preprocessors.ImageProcessorV2 | |
params: | |
size: 512 | |
border_ratio: 0.15 | |
pipeline: | |
target: hy3dgen.shapegen.pipelines.Hunyuan3DDiTFlowMatchingPipeline | |