python -m torch.distributed.launch --nproc_per_node=4 main.py --LLRD 1 \
               --eval_step 10 \
               --save_model 1 \
               --mask_stratege wwm \
               --batch_size 64 \
               --batch_size_ke 64 \
               --exp_name Fine_tune_2 \
               --exp_id v01 \
               --workers 8 \
               --use_NumEmb 1 \
               --seq_data_name Seq_data_RuAlmEntKpiTbwDoc \
               --maxlength 256 \
               --lr 4e-5 \
               --ke_lr 8e-5 \
               --train_strategy 2 \
               --model_name TeleBert2 \
               --train_ratio 1 \
               --save_pretrain 0 \
               --dist 1 \
               --accumulation_steps 8 \
               --accumulation_steps_ke 6 \
               --special_token_mask 0 \
               --freeze_layer 0 \
               --ernie_stratege -1 \
               --mlm_probability_increase curve \
               --use_kpi_loss 1 \
               --mlm_probability 0.4 \
               --use_awl 1 \
               --cls_head_init 1 \
               --emb_init 0 \
               --final_mlm_probability 0.4 \
               --ke_dim 256 \
               --plm_emb_type cls \
               --train_together 0 \