File size: 4,953 Bytes
c549860 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 |
ds_cfg:
train_micro_batch_size_per_gpu: ${per_gpu_train_batch_size}
gradient_accumulation_steps: ${gradient_accumulation_steps}
scheduler:
type: WarmupDecayLR
params:
total_num_steps: 1688
warmup_max_lr: ${learning_rate}
warmup_num_steps: 200
warmup_type: linear
optimizer:
type: AdamW
params:
lr: ${learning_rate}
betas:
- 0.9
- 0.95
eps: 1.0e-06
weight_decay: ${weight_decay}
bf16:
enabled: true
zero_optimization:
stage: 1
stage3_param_persistence_threshold: 100000.0
stage3_max_live_parameters: 100000000.0
stage3_prefetch_bucket_size: 100000000.0
memory_efficient_linear: false
steps_per_print: 25
gradient_clipping: 1.0
prescale_gradients: false
sft_model_dir: experiments/llama2.7b.chat.logiqav2.llama-2-70b-chat.dpo-sft.A6K.w4.v1.0/checkpoint-1600/
fix_hack_data_dir: ${sft_model_dir}/fix_hack_data_dir/
attempt_response_file: ${sft_model_dir}/react-inter-states/process-rm/logiqav2-train.qa.react.v1.0.0shot.sample10.inter_ver2.0.rs0.2.r0.3.sample3.json
inter_states_file_train: ${sft_model_dir}/react-inter-states/logiqav2-train.full.qa.react.v1.0.0shot.sample10.clean_inter_ver2.0.rs0.2.r0.3.[1-2]-of-20.json
inter_states_file_dev: ${sft_model_dir}/react-inter-states/logiqav2-train.full.qa.react.v1.0.0shot.sample10.clean_inter_ver2.0.rs0.2.r0.3.0-of-20.json
train_file: ${attempt_response_file}
dev_file: ${attempt_response_file}
test_file: null
torch_dtype:
_target_: general_util.training_utils.return_torch_dtype
dtype: bfloat16
tokenizer_init:
_target_: general_util.tokenization_utils.init_tokenizer
tokenizer_path: experiments/llama2.7b.chat.logiqav2.llama-2-70b-chat.dpo-sft.A6K.w4.v1.0/checkpoint-1600/
padding_side: left
device_map:
_target_: models.llama.return_single_device_map
model:
_target_: models.llama.LlamaModelForSequenceClassification.from_pretrained
num_labels: 2
gradient_checkpointing: true
pad_token_id: 0
attn_implementation: flash_attention_2
torch_dtype: ${torch_dtype}
device_map: ${device_map}
read_tensor_train:
_target_: data.general.Attempt2ValueRewardModelingDataset
max_value: 3
original_data_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
original_reader:
_target_: data.logiqav2.LogicQAReader
flat_options: true
instruction:
_target_: data.prompts.logiqav2.react.prompts.get_prompt
prompt_name: react_v2
reader:
_target_: data.general.PartialTrajAttemptsReader
partial_traj_file: ${inter_states_file_train}
template: 'Context:
{}
Question:
{}
Options:
{}
Thought 1: '
compose_keys:
- context
- question
- option_list
value_mapping:
_target_: data.general.Value2LabelMapping
name: greater_then_one
read_tensor:
_target_: data.general.Attempt2ValueRewardModelingDataset
max_value: 3
original_data_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
original_reader:
_target_: data.logiqav2.LogicQAReader
flat_options: true
instruction:
_target_: data.prompts.logiqav2.react.prompts.get_prompt
prompt_name: react_v2
reader:
_target_: data.general.PartialTrajAttemptsReader
partial_traj_file: ${inter_states_file_dev}
template: 'Context:
{}
Question:
{}
Options:
{}
Thought 1: '
compose_keys:
- context
- question
- option_list
value_mapping:
_target_: data.general.Value2LabelMapping
name: greater_then_one
dist_load_data_barrier: false
extended_vocab: null
collator:
_target_: data.general.Attempt2ValueCollator
tokenizer: ${tokenizer_init}
max_seq_length: 4096
num_workers: 8
prefetch_factor: 2
model_name_or_path: ${sft_model_dir}
pretrain: null
resume: null
exp_name: llama2.7b.chat.logiqav2.70b-distil.prm.fix_hack.H100.w4.v2.0.s${seed}
exp_notes: null
output_dir: experiments/${exp_name}
do_train: true
evaluate_during_training: true
do_eval: false
eval_sub_path: checkpoint-*
per_gpu_train_batch_size: 4
per_gpu_eval_batch_size: 12
learning_rate: 1.0e-06
gradient_accumulation_steps: 4
weight_decay: 0.01
adam_epsilon: 1.0e-06
adam_betas: (0.9, 0.98)
total_dataset_len: 54071
max_grad_norm: 1.0
num_train_epochs: 2
max_steps: 0
warmup_proportion: 0
warmup_steps: 200
optimizer: null
use_nvlamb: null
bit_training: null
logging_steps: 5
save_ds_state: false
save_steps: 200
save_best: false
eval_steps: 200
ddp_eval: true
no_cuda: false
seed: 42
local_rank: 0
fp16: true
fp16_opt_level: O1
fp16_bfloat16: true
prediction_cfg:
metric: acc
measure: 1
best_checkpoint: null
best_result: null
eval_forward_fn:
_target_: general_util.evaluator.DefaultForwardFn
post_process:
_target_: post_processors.dpo.ResponseClsPostProcessor
summary_helper:
_target_: general_util.tensorboard_helper.WandbWriter
batch_index_or_keys: null
outputs_index_or_keys: null
n_gpu: 1
device: cuda:0
train_batch_size: 4
eval_batch_size: null
world_size: 4
|