Spaces:
Sleeping
Sleeping
experiment: | |
tokenizer_checkpoint: "tokenizer_titok_b64.bin" | |
generator_checkpoint: "generator_titok_b64.bin" | |
output_dir: "titok_b_64" | |
model: | |
vq_model: | |
codebook_size: 4096 | |
token_size: 12 | |
use_l2_norm: True | |
commitment_cost: 0.25 | |
# vit arch | |
vit_enc_model_size: "base" | |
vit_dec_model_size: "base" | |
vit_enc_patch_size: 16 | |
vit_dec_patch_size: 16 | |
num_latent_tokens: 64 | |
finetune_decoder: True | |
generator: | |
model_type: "ViT" | |
hidden_size: 768 | |
num_hidden_layers: 24 | |
num_attention_heads: 16 | |
intermediate_size: 3072 | |
dropout: 0.1 | |
attn_drop: 0.1 | |
num_steps: 8 | |
class_label_dropout: 0.1 | |
image_seq_len: ${model.vq_model.num_latent_tokens} | |
condition_num_classes: 1000 | |
# sampling hyper-params | |
randomize_temperature: 11.0 | |
guidance_scale: 3.0 | |
guidance_decay: "linear" | |
dataset: | |
preprocessing: | |
crop_size: 256 |