{ "model_path": "/mnt/10tb/home/shmelev/dnalm/downstream_tasks/APARENT/bert_base_sparse_rope_4096_bs256_lr_5e-05_wd0.01_fp16_from_425k/", "log_interval": 250, "valid_interval": 1000, "save_interval": null, "save_best": true, "use_generate_on_valid": false, "init_checkpoint": "/mnt/10tb/home/shmelev/dnalm/downstream_tasks/APARENT/bert_base_sparse_rope_4096_bs256_lr_5e-05_wd0.01_fp16_from_425k/model_best_from_s3.pth", "skip_used_data": false, "reset_lr": true, "reset_iteration": true, "reset_optimizer": true, "lr": 5e-05, "batch_size": 32, "iters": 500000, "gradient_accumulation_steps": 1, "fp16": true, "fp16_allreduce": false, "apex_opt_lvl": "O2", "min_loss_scale": null, "max_loss_scale": 16777216, "clip_grad_norm": null, "clip_grad_value": null, "early_stopping_patience": null, "lr_scheduler": "constant_with_warmup", "num_warmup_steps": 1500, "num_training_steps": null, "use_lr_drop": false, "lr_drop_factor": 0.1, "lr_drop_patience": 10, "lr_drop_threshold": 0.0001, "lr_drop_threshold_mode": "rel", "lr_drop_cooldown": 0, "lr_drop_min_lr": 0.0, "lr_drop_eps": 1e-08, "optimize_metric": "pearsonr2", "optimize_mode": "max", "train_csv": "/mnt/10tb/home/shmelev/dnalm/downstream_tasks/APARENT/dataset_itself/APARENT_train.csv", "test_csv": "/mnt/10tb/home/shmelev/dnalm/downstream_tasks/APARENT/dataset_itself/APARENT_test.csv", "seed": 42, "input_seq_len": 256, "data_n_workers": 2, "model_cfg": "/mnt/10tb/home/shmelev/dnalm/data/configs/L12-H768-A12-V32k-L4096-preln-sparse-rope.json", "model_cls": "src.gena_lm.modeling_bert:BertForAPARENTSequenceRegression", "tokenizer": "/mnt/10tb/home/shmelev/dnalm/data/tokenizers/human/BPE_32k/", "optimizer": "AdamW", "weight_decay": 0.0001, "ENV": { "CUDA_VISIBLE_DEVICES": "0" }, "HVD_INIT": true, "HVD_SIZE": 1, "MACHINE": "bio-protein", "COMMIT": "2513d0c4ee891e7bcd224b9c65e88e27e0fd1892" }