AmberYifan commited on
Commit
49d3b1d
·
verified ·
1 Parent(s): c6e400b

Training in progress, epoch 3, checkpoint

Browse files
last-checkpoint/global_step189/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13f2d28b244603c8211889c9afcd695067df8233359e5bf233ac00738cc0dfe1
3
+ size 18483417132
last-checkpoint/global_step189/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e8a803f2b04a8276b1eb66810ddbc61190564b23303a92512f9ed7afbb4fcb7
3
+ size 18483417132
last-checkpoint/global_step189/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2adf3fe11d4864749006592786bac2389b529c2fb573de7fc3dde322436ed508
3
+ size 18483417132
last-checkpoint/global_step189/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34a51614e95c914d61cf1aa8c0b9f4aae4a0a9d65868f482622ed0db85587ed7
3
+ size 18483417132
last-checkpoint/global_step189/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d9e3111d3d26ecb49527b8be396833f19c85a4a9035479056db556ad4a57810
3
+ size 239998
last-checkpoint/global_step189/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:057ecdfa0fe058bb849b670b85f4e7901d31dfc556c46fecc4be81caba38acf2
3
+ size 239934
last-checkpoint/global_step189/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:922227d611126b51889f9e50b1f09afa5d2366bd6caaf2fdd49d6c48fd8172eb
3
+ size 239934
last-checkpoint/global_step189/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b46671cd1d4110dbb9dd43fddab33325dddfa3bef14669e729c74cc1d4c3674e
3
+ size 239934
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step63
 
1
+ global_step189
last-checkpoint/model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a3faa501ab33a80380f2180b1f8fb7f16d78a7f89b516c1fb239de055d43bed
3
  size 4903351912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea1b6d7fce5305bfacd4bb0f50d40e3bbe0238a3bb00f40af276051bf7fc5df1
3
  size 4903351912
last-checkpoint/model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d3fbbc0ff5deb82216230034c2e2eedcb7e66b8b01c863e39977722cca24e6bc
3
  size 4947570872
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51ea4315a74315b512266770598430ee290cc9c17281a3c02e07e4d7f3345713
3
  size 4947570872
last-checkpoint/model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89355447e0788040c35f2751b524c4c98dd37271ed58c929b2c3c8ca9fa681fe
3
  size 4962221464
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d4602515e8507cbdaa8a21892d38fe5c4bb7d7d205e4484fee3bc16458d146f
3
  size 4962221464
last-checkpoint/model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65397289baba5c364fecf387a6c902463375bf07a37fda7b7ea59587c5bfbd1c
3
  size 3670322200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03baccf59188e4af4695f188e99336c9705d69575ac58cf26555a9a706c08e3a
3
  size 3670322200
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f26ed720ddc0139bd34500d6a56d90b1325ef50284b8f9e9bcefa8c01d87e400
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:084292ad91359496cd16eff7072d86abd759541271e83fa50d494de3dbdf77ee
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2bcae6573442578b752be3e988d4ccb38056b45c31b5c02eb579ec6cebcfa62e
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7b183324e8227a51a9556d86b2ad893a8c4c52205ed4a737356c6611dac7353
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:398ff45072fee2975e88b9e078f915103d75d94bc08753303cf855b915973623
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac69e994090f4818cb1fa6f6cefa363178552c3c731c6507ff195bcb07fd5bef
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f4b210bc832d4c648e3bee8c21dca26e5a8b365d6ec90c638062005a052e57b
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f68e61b63402f8afb1f69c960f7944965655dac11e3ccf29919c282f23931f86
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e2d72f8efaf9223a5f3a90e86eb233b5729e521882d016722304afbd046b461c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:129a41200f0c6d6e44aca4acb3d831836fddce423bc461718262ff68762bbd3e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
  "eval_steps": 500,
6
- "global_step": 63,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -128,6 +128,218 @@
128
  "eval_samples_per_second": 10.597,
129
  "eval_steps_per_second": 0.371,
130
  "step": 63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  }
132
  ],
133
  "logging_steps": 10,
@@ -142,7 +354,7 @@
142
  "should_evaluate": false,
143
  "should_log": false,
144
  "should_save": true,
145
- "should_training_stop": false
146
  },
147
  "attributes": {}
148
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
  "eval_steps": 500,
6
+ "global_step": 189,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
128
  "eval_samples_per_second": 10.597,
129
  "eval_steps_per_second": 0.371,
130
  "step": 63
131
+ },
132
+ {
133
+ "epoch": 1.1111111111111112,
134
+ "grad_norm": 20.770261653452945,
135
+ "learning_rate": 3.5e-07,
136
+ "logits/chosen": -1.6171875,
137
+ "logits/rejected": -0.87109375,
138
+ "logps/chosen": -62.25,
139
+ "logps/rejected": -85.0,
140
+ "loss": 0.3381,
141
+ "rewards/accuracies": 0.8999999761581421,
142
+ "rewards/chosen": 0.578125,
143
+ "rewards/margins": 1.953125,
144
+ "rewards/rejected": -1.375,
145
+ "step": 70
146
+ },
147
+ {
148
+ "epoch": 1.2698412698412698,
149
+ "grad_norm": 18.43753108370381,
150
+ "learning_rate": 3.205882352941177e-07,
151
+ "logits/chosen": -1.34375,
152
+ "logits/rejected": -0.84375,
153
+ "logps/chosen": -49.5,
154
+ "logps/rejected": -99.0,
155
+ "loss": 0.3254,
156
+ "rewards/accuracies": 0.925000011920929,
157
+ "rewards/chosen": 0.3671875,
158
+ "rewards/margins": 2.390625,
159
+ "rewards/rejected": -2.015625,
160
+ "step": 80
161
+ },
162
+ {
163
+ "epoch": 1.4285714285714286,
164
+ "grad_norm": 16.839062370433073,
165
+ "learning_rate": 2.911764705882353e-07,
166
+ "logits/chosen": -1.5078125,
167
+ "logits/rejected": -0.78515625,
168
+ "logps/chosen": -57.5,
169
+ "logps/rejected": -72.5,
170
+ "loss": 0.2715,
171
+ "rewards/accuracies": 0.8999999761581421,
172
+ "rewards/chosen": 0.84765625,
173
+ "rewards/margins": 2.03125,
174
+ "rewards/rejected": -1.1875,
175
+ "step": 90
176
+ },
177
+ {
178
+ "epoch": 1.5873015873015874,
179
+ "grad_norm": 16.580044861649466,
180
+ "learning_rate": 2.6176470588235295e-07,
181
+ "logits/chosen": -2.09375,
182
+ "logits/rejected": -1.3359375,
183
+ "logps/chosen": -51.0,
184
+ "logps/rejected": -99.5,
185
+ "loss": 0.2519,
186
+ "rewards/accuracies": 0.8374999761581421,
187
+ "rewards/chosen": 0.345703125,
188
+ "rewards/margins": 1.9375,
189
+ "rewards/rejected": -1.59375,
190
+ "step": 100
191
+ },
192
+ {
193
+ "epoch": 1.746031746031746,
194
+ "grad_norm": 16.88214421094986,
195
+ "learning_rate": 2.323529411764706e-07,
196
+ "logits/chosen": -2.3125,
197
+ "logits/rejected": -1.171875,
198
+ "logps/chosen": -55.75,
199
+ "logps/rejected": -90.5,
200
+ "loss": 0.2389,
201
+ "rewards/accuracies": 0.925000011920929,
202
+ "rewards/chosen": 0.197265625,
203
+ "rewards/margins": 1.9296875,
204
+ "rewards/rejected": -1.734375,
205
+ "step": 110
206
+ },
207
+ {
208
+ "epoch": 1.9047619047619047,
209
+ "grad_norm": 12.829485946331816,
210
+ "learning_rate": 2.0294117647058823e-07,
211
+ "logits/chosen": -1.8828125,
212
+ "logits/rejected": -0.9453125,
213
+ "logps/chosen": -50.5,
214
+ "logps/rejected": -102.5,
215
+ "loss": 0.2831,
216
+ "rewards/accuracies": 0.9125000238418579,
217
+ "rewards/chosen": 0.10888671875,
218
+ "rewards/margins": 2.578125,
219
+ "rewards/rejected": -2.46875,
220
+ "step": 120
221
+ },
222
+ {
223
+ "epoch": 2.0,
224
+ "eval_logits/chosen": -1.84375,
225
+ "eval_logits/rejected": -1.171875,
226
+ "eval_logps/chosen": -66.0,
227
+ "eval_logps/rejected": -97.5,
228
+ "eval_loss": 0.6284375190734863,
229
+ "eval_rewards/accuracies": 0.6071428656578064,
230
+ "eval_rewards/chosen": -0.453125,
231
+ "eval_rewards/margins": 0.75,
232
+ "eval_rewards/rejected": -1.203125,
233
+ "eval_runtime": 23.6296,
234
+ "eval_samples_per_second": 8.464,
235
+ "eval_steps_per_second": 0.296,
236
+ "step": 126
237
+ },
238
+ {
239
+ "epoch": 2.0634920634920633,
240
+ "grad_norm": 7.418629403738186,
241
+ "learning_rate": 1.7352941176470587e-07,
242
+ "logits/chosen": -2.109375,
243
+ "logits/rejected": -0.73046875,
244
+ "logps/chosen": -61.25,
245
+ "logps/rejected": -106.0,
246
+ "loss": 0.1749,
247
+ "rewards/accuracies": 0.9624999761581421,
248
+ "rewards/chosen": 0.4296875,
249
+ "rewards/margins": 2.625,
250
+ "rewards/rejected": -2.203125,
251
+ "step": 130
252
+ },
253
+ {
254
+ "epoch": 2.2222222222222223,
255
+ "grad_norm": 5.917946365968759,
256
+ "learning_rate": 1.441176470588235e-07,
257
+ "logits/chosen": -2.390625,
258
+ "logits/rejected": -1.2890625,
259
+ "logps/chosen": -72.0,
260
+ "logps/rejected": -100.0,
261
+ "loss": 0.1132,
262
+ "rewards/accuracies": 0.9750000238418579,
263
+ "rewards/chosen": 0.6015625,
264
+ "rewards/margins": 3.65625,
265
+ "rewards/rejected": -3.0625,
266
+ "step": 140
267
+ },
268
+ {
269
+ "epoch": 2.380952380952381,
270
+ "grad_norm": 7.62179839987552,
271
+ "learning_rate": 1.1470588235294116e-07,
272
+ "logits/chosen": -2.125,
273
+ "logits/rejected": -1.296875,
274
+ "logps/chosen": -54.0,
275
+ "logps/rejected": -92.5,
276
+ "loss": 0.125,
277
+ "rewards/accuracies": 0.987500011920929,
278
+ "rewards/chosen": 0.66796875,
279
+ "rewards/margins": 3.171875,
280
+ "rewards/rejected": -2.5,
281
+ "step": 150
282
+ },
283
+ {
284
+ "epoch": 2.5396825396825395,
285
+ "grad_norm": 7.689817277750616,
286
+ "learning_rate": 8.529411764705883e-08,
287
+ "logits/chosen": -1.8671875,
288
+ "logits/rejected": -2.3125,
289
+ "logps/chosen": -50.25,
290
+ "logps/rejected": -92.5,
291
+ "loss": 0.118,
292
+ "rewards/accuracies": 0.949999988079071,
293
+ "rewards/chosen": 0.3203125,
294
+ "rewards/margins": 3.109375,
295
+ "rewards/rejected": -2.78125,
296
+ "step": 160
297
+ },
298
+ {
299
+ "epoch": 2.6984126984126986,
300
+ "grad_norm": 5.583079792162674,
301
+ "learning_rate": 5.5882352941176474e-08,
302
+ "logits/chosen": -2.21875,
303
+ "logits/rejected": -1.1328125,
304
+ "logps/chosen": -66.0,
305
+ "logps/rejected": -114.5,
306
+ "loss": 0.145,
307
+ "rewards/accuracies": 0.9750000238418579,
308
+ "rewards/chosen": 0.388671875,
309
+ "rewards/margins": 3.671875,
310
+ "rewards/rejected": -3.28125,
311
+ "step": 170
312
+ },
313
+ {
314
+ "epoch": 2.857142857142857,
315
+ "grad_norm": 10.79399873370834,
316
+ "learning_rate": 2.6470588235294116e-08,
317
+ "logits/chosen": -1.6640625,
318
+ "logits/rejected": -1.5234375,
319
+ "logps/chosen": -59.0,
320
+ "logps/rejected": -87.5,
321
+ "loss": 0.1329,
322
+ "rewards/accuracies": 0.949999988079071,
323
+ "rewards/chosen": 0.310546875,
324
+ "rewards/margins": 2.75,
325
+ "rewards/rejected": -2.4375,
326
+ "step": 180
327
+ },
328
+ {
329
+ "epoch": 3.0,
330
+ "eval_logits/chosen": -2.21875,
331
+ "eval_logits/rejected": -1.4921875,
332
+ "eval_logps/chosen": -69.5,
333
+ "eval_logps/rejected": -102.5,
334
+ "eval_loss": 0.6340625286102295,
335
+ "eval_rewards/accuracies": 0.6607142686843872,
336
+ "eval_rewards/chosen": -0.76953125,
337
+ "eval_rewards/margins": 0.921875,
338
+ "eval_rewards/rejected": -1.6953125,
339
+ "eval_runtime": 21.8558,
340
+ "eval_samples_per_second": 9.151,
341
+ "eval_steps_per_second": 0.32,
342
+ "step": 189
343
  }
344
  ],
345
  "logging_steps": 10,
 
354
  "should_evaluate": false,
355
  "should_log": false,
356
  "should_save": true,
357
+ "should_training_stop": true
358
  },
359
  "attributes": {}
360
  }