{ | |
"_class_name": "ConsisIDTransformer3DModel", | |
"_diffusers_version": "0.31.0.dev0", | |
"activation_fn": "gelu-approximate", | |
"attention_bias": true, | |
"attention_head_dim": 64, | |
"cross_attn_interval": 2, | |
"dropout": 0.0, | |
"flip_sin_to_cos": true, | |
"freq_shift": 0, | |
"in_channels": 32, | |
"is_kps": false, | |
"is_train_face": true, | |
"LFE_heads": 12, | |
"LFE_num_tokens": 32, | |
"LFE_output_dim": 768, | |
"local_face_scale": 1.0, | |
"max_text_seq_length": 226, | |
"norm_elementwise_affine": true, | |
"norm_eps": 1e-05, | |
"num_attention_heads": 48, | |
"num_layers": 42, | |
"out_channels": 16, | |
"patch_size": 2, | |
"sample_frames": 49, | |
"sample_height": 60, | |
"sample_width": 90, | |
"spatial_interpolation_scale": 1.875, | |
"temporal_compression_ratio": 4, | |
"temporal_interpolation_scale": 1.0, | |
"text_embed_dim": 4096, | |
"time_embed_dim": 512, | |
"timestep_activation_fn": "silu", | |
"use_learned_positional_embeddings": true, | |
"use_rotary_positional_embeddings": true | |
} | |