{"device": "cuda", "dtype": "float32", "autocast": true, "autocast_dtype": "float16", "seed": 2036, "show": false, "continue_from": "//pretrained/facebook/musicgen-small", "execute_only": "None", "execute_inplace": false, "benchmark_no_load": false, "efficient_attention_backend": "torch", "num_threads": 1, "mp_start_method": "forkserver", "label": "None", "logging.level": "INFO", "logging.log_updates": 10, "logging.log_tensorboard": true, "logging.log_wandb": false, "tensorboard.with_media_logging": false, "tensorboard.name": "None", "tensorboard.sub_dir": "None", "wandb.with_media_logging": true, "wandb.project": "None", "wandb.name": "None", "wandb.group": "None", "slurm.gpus": 4, "slurm.mem_per_gpu": 40, "slurm.time": 3600, "slurm.constraint": "None", "slurm.partition": "None", "slurm.comment": "None", "slurm.setup": "[]", "slurm.exclude": "", "dora.dir": "/checkpoint/hari/experiments/audiocraft/outputs", "dora.exclude": "['device', 'wandb.*', 'tensorboard.*', 'logging.*', 'dataset.num_workers', 'eval.num_workers', 'special.*', 'metrics.visqol.bin', 'metrics.fad.bin', 'execute_only', 'execute_best', 'generate.every', 'optim.eager_sync', 'profiler.*', 'deadlock.*', 'efficient_attention_backend', 'num_threads', 'mp_start_method']", "dora.use_rendezvous": false, "dora.git_save": true, "datasource.max_sample_rate": 44100, "datasource.max_channels": 2, "datasource.train": "/kaggle/working/audiocraft/egs/folk", "datasource.valid": "/kaggle/working/audiocraft/egs/folk", "datasource.evaluate": "/kaggle/working/audiocraft/egs/folk", "datasource.generate": "/kaggle/working/audiocraft/egs/folk", "solver": "musicgen", "fsdp.use": false, "fsdp.param_dtype": "float16", "fsdp.reduce_dtype": "float32", "fsdp.buffer_dtype": "float32", "fsdp.sharding_strategy": "shard_grad_op", "fsdp.per_block": true, "profiler.enabled": false, "deadlock.use": true, "deadlock.timeout": 600, "dataset.batch_size": 2, "dataset.num_workers": 2, "dataset.segment_duration": 30, "dataset.num_samples": "None", "dataset.return_info": true, "dataset.shuffle": false, "dataset.sample_on_duration": false, "dataset.sample_on_weight": false, "dataset.min_segment_ratio": 0.8, "dataset.train.num_samples": 1000000, "dataset.train.shuffle": true, "dataset.train.shuffle_seed": 0, "dataset.train.permutation_on_files": false, "dataset.train.merge_text_p": 0.25, "dataset.train.drop_desc_p": 0.5, "dataset.train.drop_other_p": 0.5, "dataset.valid.num_samples": 1, "dataset.evaluate.num_samples": 10000, "dataset.generate.num_samples": 50, "dataset.generate.return_info": true, "checkpoint.save_last": true, "checkpoint.save_every": 50, "checkpoint.keep_last": 10, "checkpoint.keep_every_states": "None", "generate.every": 25, "generate.path": "samples", "generate.audio.format": "wav", "generate.audio.strategy": "loudness", "generate.audio.sample_rate": 32000, "generate.audio.loudness_headroom_db": 14, "generate.lm.use_sampling": true, "generate.lm.temp": 1.0, "generate.lm.top_k": 250, "generate.lm.top_p": 0.0, "generate.lm.prompted_samples": false, "generate.lm.unprompted_samples": true, "generate.lm.gen_gt_samples": true, "generate.lm.prompt_duration": "None", "generate.lm.gen_duration": "None", "generate.lm.remove_prompts": false, "generate.num_workers": 5, "evaluate.every": 25, "evaluate.num_workers": 5, "evaluate.truncate_audio": "None", "evaluate.fixed_generation_duration": "None", "evaluate.metrics.base": false, "evaluate.metrics.fad": false, "evaluate.metrics.kld": false, "evaluate.metrics.text_consistency": false, "evaluate.metrics.chroma_cosine": false, "optim.epochs": 10, "optim.updates_per_epoch": 1000, "optim.lr": 0.0001, "optim.optimizer": "adamw", "optim.adam.betas": "[0.9, 0.95]", "optim.adam.weight_decay": 0.01, "optim.adam.eps": 1e-08, "optim.ema.use": true, "optim.ema.updates": 10, "optim.ema.device": "cuda", "optim.ema.decay": 0.99, "optim.max_norm": 1.0, "optim.eager_sync": true, "schedule.lr_scheduler": "cosine", "schedule.step.step_size": "None", "schedule.step.gamma": "None", "schedule.exponential.lr_decay": "None", "schedule.cosine.warmup": 8, "schedule.cosine.lr_min_ratio": 0.0, "schedule.cosine.cycle_length": 1.0, "schedule.polynomial_decay.warmup": "None", "schedule.polynomial_decay.zero_lr_warmup_steps": 0, "schedule.polynomial_decay.end_lr": 0.0, "schedule.polynomial_decay.power": 1, "schedule.inverse_sqrt.warmup": "None", "schedule.inverse_sqrt.warmup_init_lr": 0.0, "schedule.linear_warmup.warmup": "None", "schedule.linear_warmup.warmup_init_lr": 0.0, "classifier_free_guidance.training_dropout": 0.3, "classifier_free_guidance.inference_coef": 3.0, "fuser.cross_attention_pos_emb": false, "fuser.cross_attention_pos_emb_scale": 1, "fuser.sum": "[]", "fuser.prepend": "[]", "fuser.cross": "['description']", "fuser.input_interpolate": "[]", "conditioners.description.model": "t5", "conditioners.description.t5.name": "t5-base", "conditioners.description.t5.finetune": false, "conditioners.description.t5.word_dropout": 0.3, "conditioners.description.t5.normalize_text": false, "sample_rate": 32000, "channels": 1, "compression_model_checkpoint": "//pretrained/facebook/encodec_32khz", "compression_model_n_q": "None", "tokens.padding_with_special_token": false, "interleave_stereo_codebooks.use": false, "interleave_stereo_codebooks.per_timestep": false, "cache.path": "None", "cache.write": false, "cache.write_shard": 0, "cache.write_num_shards": 1, "metrics.fad.use_gt": false, "metrics.fad.model": "tf", "metrics.fad.tf.bin": "None", "metrics.fad.tf.model_path": "//reference/fad/vggish_model.ckpt", "metrics.kld.use_gt": false, "metrics.kld.model": "passt", "metrics.kld.passt.pretrained_length": 20, "metrics.text_consistency.use_gt": false, "metrics.text_consistency.model": "clap", "metrics.text_consistency.clap.model_path": "//reference/clap/music_audioset_epoch_15_esc_90.14.pt", "metrics.text_consistency.clap.model_arch": "HTSAT-base", "metrics.text_consistency.clap.enable_fusion": false, "metrics.chroma_cosine.use_gt": false, "metrics.chroma_cosine.model": "chroma_base", "metrics.chroma_cosine.chroma_base.sample_rate": 32000, "metrics.chroma_cosine.chroma_base.n_chroma": 12, "metrics.chroma_cosine.chroma_base.radix2_exp": 14, "metrics.chroma_cosine.chroma_base.argmax": true, "lm_model": "transformer_lm", "codebooks_pattern.modeling": "delay", "codebooks_pattern.delay.delays": "[0, 1, 2, 3]", "codebooks_pattern.delay.flatten_first": 0, "codebooks_pattern.delay.empty_initial": 0, "codebooks_pattern.unroll.flattening": "[0, 1, 2, 3]", "codebooks_pattern.unroll.delays": "[0, 0, 0, 0]", "codebooks_pattern.music_lm.group_by": 2, "codebooks_pattern.coarse_first.delays": "[0, 0, 0]", "transformer_lm.dim": 1024, "transformer_lm.num_heads": 16, "transformer_lm.num_layers": 24, "transformer_lm.hidden_scale": 4, "transformer_lm.n_q": 4, "transformer_lm.card": 2048, "transformer_lm.dropout": 0.0, "transformer_lm.emb_lr": "None", "transformer_lm.activation": "gelu", "transformer_lm.norm_first": true, "transformer_lm.bias_ff": false, "transformer_lm.bias_attn": false, "transformer_lm.bias_proj": false, "transformer_lm.past_context": "None", "transformer_lm.causal": true, "transformer_lm.custom": false, "transformer_lm.memory_efficient": true, "transformer_lm.attention_as_float32": false, "transformer_lm.layer_scale": "None", "transformer_lm.positional_embedding": "sin", "transformer_lm.xpos": false, "transformer_lm.checkpointing": "none", "transformer_lm.weight_init": "gaussian", "transformer_lm.depthwise_init": "current", "transformer_lm.zero_bias_init": true, "transformer_lm.norm": "layer_norm", "transformer_lm.cross_attention": false, "transformer_lm.qk_layer_norm": false, "transformer_lm.qk_layer_norm_cross": false, "transformer_lm.attention_dropout": "None", "transformer_lm.kv_repeat": 1, "transformer_lm.two_step_cfg": false} |