File size: 1,604 Bytes
68db296
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
model:
  target: hy3dgen.shapegen.models.Hunyuan3DDiT
  params:
    in_channels: 64
    context_in_dim: 1536
    hidden_size: 1024
    mlp_ratio: 4.0
    num_heads: 16
    depth: 16
    depth_single_blocks: 32
    axes_dim: [ 64 ]
    theta: 10000
    qkv_bias: True

vae:
  target: hy3dgen.shapegen.models.ShapeVAE
  params:
    num_latents: 3072
    embed_dim: 64
    num_freqs: 8
    include_pi: false
    heads: 16
    width: 1024
    num_decoder_layers: 16
    qkv_bias: false
    qk_norm: true
    scale_factor: 0.9990943042622529

conditioner:
  target: hy3dgen.shapegen.models.SingleImageEncoder
  params:
    main_image_encoder:
      type: DinoImageEncoder # dino giant
      kwargs:
        config:
          attention_probs_dropout_prob: 0.0
          drop_path_rate: 0.0
          hidden_act: gelu
          hidden_dropout_prob: 0.0
          hidden_size: 1536
          image_size: 518
          initializer_range: 0.02
          layer_norm_eps: 1.e-6
          layerscale_value: 1.0
          mlp_ratio: 4
          model_type: dinov2
          num_attention_heads: 24
          num_channels: 3
          num_hidden_layers: 40
          patch_size: 14
          qkv_bias: true
          torch_dtype: float32
          use_swiglu_ffn: true
        image_size: 518

scheduler:
  target: hy3dgen.shapegen.schedulers.FlowMatchEulerDiscreteScheduler
  params:
    num_train_timesteps: 1000

image_processor:
  target: hy3dgen.shapegen.preprocessors.ImageProcessorV2
  params:
    size: 512
    border_ratio: 0.15

pipeline:
  target: hy3dgen.shapegen.pipelines.Hunyuan3DDiTFlowMatchingPipeline