RAR / configs /infer /titok_b64.yaml
yucornetto's picture
upload
51ce47d verified
experiment:
tokenizer_checkpoint: "tokenizer_titok_b64.bin"
generator_checkpoint: "generator_titok_b64.bin"
output_dir: "titok_b_64"
model:
vq_model:
codebook_size: 4096
token_size: 12
use_l2_norm: True
commitment_cost: 0.25
# vit arch
vit_enc_model_size: "base"
vit_dec_model_size: "base"
vit_enc_patch_size: 16
vit_dec_patch_size: 16
num_latent_tokens: 64
finetune_decoder: True
generator:
model_type: "ViT"
hidden_size: 768
num_hidden_layers: 24
num_attention_heads: 16
intermediate_size: 3072
dropout: 0.1
attn_drop: 0.1
num_steps: 8
class_label_dropout: 0.1
image_seq_len: ${model.vq_model.num_latent_tokens}
condition_num_classes: 1000
# sampling hyper-params
randomize_temperature: 11.0
guidance_scale: 3.0
guidance_decay: "linear"
dataset:
preprocessing:
crop_size: 256