File size: 4,953 Bytes
c549860
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
ds_cfg:
  train_micro_batch_size_per_gpu: ${per_gpu_train_batch_size}
  gradient_accumulation_steps: ${gradient_accumulation_steps}
  scheduler:
    type: WarmupDecayLR
    params:
      total_num_steps: 1688
      warmup_max_lr: ${learning_rate}
      warmup_num_steps: 200
      warmup_type: linear
  optimizer:
    type: AdamW
    params:
      lr: ${learning_rate}
      betas:
      - 0.9
      - 0.95
      eps: 1.0e-06
      weight_decay: ${weight_decay}
  bf16:
    enabled: true
  zero_optimization:
    stage: 1
    stage3_param_persistence_threshold: 100000.0
    stage3_max_live_parameters: 100000000.0
    stage3_prefetch_bucket_size: 100000000.0
    memory_efficient_linear: false
  steps_per_print: 25
  gradient_clipping: 1.0
  prescale_gradients: false
sft_model_dir: experiments/llama2.7b.chat.logiqav2.llama-2-70b-chat.dpo-sft.A6K.w4.v1.0/checkpoint-1600/
fix_hack_data_dir: ${sft_model_dir}/fix_hack_data_dir/
attempt_response_file: ${sft_model_dir}/react-inter-states/process-rm/logiqav2-train.qa.react.v1.0.0shot.sample10.inter_ver2.0.rs0.2.r0.3.sample3.json
inter_states_file_train: ${sft_model_dir}/react-inter-states/logiqav2-train.full.qa.react.v1.0.0shot.sample10.clean_inter_ver2.0.rs0.2.r0.3.[1-2]-of-20.json
inter_states_file_dev: ${sft_model_dir}/react-inter-states/logiqav2-train.full.qa.react.v1.0.0shot.sample10.clean_inter_ver2.0.rs0.2.r0.3.0-of-20.json
train_file: ${attempt_response_file}
dev_file: ${attempt_response_file}
test_file: null
torch_dtype:
  _target_: general_util.training_utils.return_torch_dtype
  dtype: bfloat16
tokenizer_init:
  _target_: general_util.tokenization_utils.init_tokenizer
  tokenizer_path: experiments/llama2.7b.chat.logiqav2.llama-2-70b-chat.dpo-sft.A6K.w4.v1.0/checkpoint-1600/
  padding_side: left
device_map:
  _target_: models.llama.return_single_device_map
model:
  _target_: models.llama.LlamaModelForSequenceClassification.from_pretrained
  num_labels: 2
  gradient_checkpointing: true
  pad_token_id: 0
  attn_implementation: flash_attention_2
  torch_dtype: ${torch_dtype}
  device_map: ${device_map}
read_tensor_train:
  _target_: data.general.Attempt2ValueRewardModelingDataset
  max_value: 3
  original_data_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
  original_reader:
    _target_: data.logiqav2.LogicQAReader
    flat_options: true
  instruction:
    _target_: data.prompts.logiqav2.react.prompts.get_prompt
    prompt_name: react_v2
  reader:
    _target_: data.general.PartialTrajAttemptsReader
    partial_traj_file: ${inter_states_file_train}
  template: 'Context:

    {}


    Question:

    {}


    Options:

    {}


    Thought 1: '
  compose_keys:
  - context
  - question
  - option_list
  value_mapping:
    _target_: data.general.Value2LabelMapping
    name: greater_then_one
read_tensor:
  _target_: data.general.Attempt2ValueRewardModelingDataset
  max_value: 3
  original_data_file: ../research.data/LogiQA2.0/logiqa/DATA/LOGIQA/train.txt
  original_reader:
    _target_: data.logiqav2.LogicQAReader
    flat_options: true
  instruction:
    _target_: data.prompts.logiqav2.react.prompts.get_prompt
    prompt_name: react_v2
  reader:
    _target_: data.general.PartialTrajAttemptsReader
    partial_traj_file: ${inter_states_file_dev}
  template: 'Context:

    {}


    Question:

    {}


    Options:

    {}


    Thought 1: '
  compose_keys:
  - context
  - question
  - option_list
  value_mapping:
    _target_: data.general.Value2LabelMapping
    name: greater_then_one
dist_load_data_barrier: false
extended_vocab: null
collator:
  _target_: data.general.Attempt2ValueCollator
  tokenizer: ${tokenizer_init}
  max_seq_length: 4096
num_workers: 8
prefetch_factor: 2
model_name_or_path: ${sft_model_dir}
pretrain: null
resume: null
exp_name: llama2.7b.chat.logiqav2.70b-distil.prm.fix_hack.H100.w4.v2.0.s${seed}
exp_notes: null
output_dir: experiments/${exp_name}
do_train: true
evaluate_during_training: true
do_eval: false
eval_sub_path: checkpoint-*
per_gpu_train_batch_size: 4
per_gpu_eval_batch_size: 12
learning_rate: 1.0e-06
gradient_accumulation_steps: 4
weight_decay: 0.01
adam_epsilon: 1.0e-06
adam_betas: (0.9, 0.98)
total_dataset_len: 54071
max_grad_norm: 1.0
num_train_epochs: 2
max_steps: 0
warmup_proportion: 0
warmup_steps: 200
optimizer: null
use_nvlamb: null
bit_training: null
logging_steps: 5
save_ds_state: false
save_steps: 200
save_best: false
eval_steps: 200
ddp_eval: true
no_cuda: false
seed: 42
local_rank: 0
fp16: true
fp16_opt_level: O1
fp16_bfloat16: true
prediction_cfg:
  metric: acc
  measure: 1
  best_checkpoint: null
  best_result: null
eval_forward_fn:
  _target_: general_util.evaluator.DefaultForwardFn
post_process:
  _target_: post_processors.dpo.ResponseClsPostProcessor
summary_helper:
  _target_: general_util.tensorboard_helper.WandbWriter
  batch_index_or_keys: null
  outputs_index_or_keys: null
n_gpu: 1
device: cuda:0
train_batch_size: 4
eval_batch_size: null
world_size: 4