AmberYifan commited on
Commit
f38aa91
·
verified ·
1 Parent(s): 4c1b4aa

Training in progress, epoch 2, checkpoint

Browse files
last-checkpoint/global_step313/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc5168e094c353f6c1e715b7713522246c8956ba1042e011d84694b104fd66f0
3
+ size 13476835648
last-checkpoint/global_step313/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb48bfc1466dfe16b5f8b187ab0459aa51cd1f2b180b6e2379a7c66dc2059ae8
3
+ size 13476835648
last-checkpoint/global_step313/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae156148c62d4cc756567606673c8af0e8bcd08b9399e3a4a13678c9a3ed669f
3
+ size 13476835648
last-checkpoint/global_step313/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0db14b7fa9764709bfac94d039769e9a774f4aedd210afda859734cf76886bf2
3
+ size 13476835648
last-checkpoint/global_step313/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87b448f691cfa4a46495dc05e49e01cff1797eee23ebb09e55632c64c52dc224
3
+ size 150693
last-checkpoint/global_step313/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:211e1ef93c6886703f99a126064c19878398b27bfd7d6fe55c0993b88fd20b06
3
+ size 150693
last-checkpoint/global_step313/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5a18f8c5c0e35b7a56b01112f6b3a949d7c33eb71375ee3d414c563c689d607
3
+ size 150693
last-checkpoint/global_step313/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42b9a66ad55de6e97e1ca4bcd8022a87634ef75bde4ced7652fa266ac694f25f
3
+ size 150693
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step156
 
1
+ global_step313
last-checkpoint/model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea2003c7f2b81624716ea1e9cc5797b75a32c0210b692218e05bc308cbf38677
3
  size 4938985352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c339c890ec08f3e5e58f5ad06ebae0dbd6ddb23ad121d561e63e9bac188bfc9b
3
  size 4938985352
last-checkpoint/model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:36c4dbb8786a4dd80f301b96f745c06e2ea16eeb0a74d8bfac0e893940b92ea5
3
  size 4947390880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76dbefc709489db092eb6a498172ab21f68b4a5a5dff5455f75484d6faa34133
3
  size 4947390880
last-checkpoint/model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed6718359c5e87e0c28bd21b8861ea7fba4affa8f6c316ffd553489279792a58
3
  size 3590488816
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a533572b108506e6e59af9b867786193b44716117ef6ea8ec975a5d6308541b8
3
  size 3590488816
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4a1dc8b9f546123ce9b44c28828ae71d9d0ecd6f59c6b7b8337c73cb2e24c49
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50ca27603f7c4a4d9a980223e1a102922ac31561acfa7da4f28042e4e8b3528b
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a8b225b0323772b66570a16b5c7e6446dc3c9ee076597906a2c90e3c0a383bf
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c57f658c3d9b6de611da50021823095fc4067036fb9a4b34655cc2e34fbf5c2b
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c036a8d528905f7018266618c8bebe4add10c2778ef432fd0116369fa622d815
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:109da2c363a1e09432fa2ea2b7dceb6f67d1f91975069fc669e71dd536d654d8
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b93d88429d97654e76d10b1ba4c36f29a47e210c65a03efdc136f04af8282797
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bda65e490958bfe3fe01157ed131eb2354f8fc19536842d27c998ecf29d48991
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83d5ef0cb3d7d5c4ee48b323753e4be796504f2feec3727a9d0af4a98df5f901
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afa7143d1f3515855bc2dc683c033423673c7ec610b3bb70a98e9e265d312b47
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9968051118210862,
5
  "eval_steps": 500,
6
- "global_step": 156,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -263,6 +263,262 @@
263
  "eval_samples_per_second": 17.219,
264
  "eval_steps_per_second": 0.603,
265
  "step": 156
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
  }
267
  ],
268
  "logging_steps": 10,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
  "eval_steps": 500,
6
+ "global_step": 313,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
263
  "eval_samples_per_second": 17.219,
264
  "eval_steps_per_second": 0.603,
265
  "step": 156
266
+ },
267
+ {
268
+ "epoch": 1.0223642172523961,
269
+ "grad_norm": 0.05686403061946404,
270
+ "learning_rate": 3.6579572446555817e-07,
271
+ "logits/chosen": -1.5859375,
272
+ "logits/rejected": -1.453125,
273
+ "logps/chosen": -174.0,
274
+ "logps/rejected": -249.0,
275
+ "loss": 0.0005,
276
+ "rewards/accuracies": 1.0,
277
+ "rewards/chosen": -0.95703125,
278
+ "rewards/margins": 10.875,
279
+ "rewards/rejected": -11.8125,
280
+ "step": 160
281
+ },
282
+ {
283
+ "epoch": 1.0862619808306708,
284
+ "grad_norm": 0.025647512662174864,
285
+ "learning_rate": 3.5391923990498813e-07,
286
+ "logits/chosen": -1.53125,
287
+ "logits/rejected": -1.4765625,
288
+ "logps/chosen": -151.0,
289
+ "logps/rejected": -253.0,
290
+ "loss": 0.0002,
291
+ "rewards/accuracies": 1.0,
292
+ "rewards/chosen": -0.890625,
293
+ "rewards/margins": 11.4375,
294
+ "rewards/rejected": -12.3125,
295
+ "step": 170
296
+ },
297
+ {
298
+ "epoch": 1.1501597444089458,
299
+ "grad_norm": 0.10600901838990953,
300
+ "learning_rate": 3.42042755344418e-07,
301
+ "logits/chosen": -1.5546875,
302
+ "logits/rejected": -1.46875,
303
+ "logps/chosen": -160.0,
304
+ "logps/rejected": -249.0,
305
+ "loss": 0.0003,
306
+ "rewards/accuracies": 1.0,
307
+ "rewards/chosen": -0.94140625,
308
+ "rewards/margins": 11.25,
309
+ "rewards/rejected": -12.25,
310
+ "step": 180
311
+ },
312
+ {
313
+ "epoch": 1.2140575079872205,
314
+ "grad_norm": 0.13119830498432972,
315
+ "learning_rate": 3.3016627078384796e-07,
316
+ "logits/chosen": -1.53125,
317
+ "logits/rejected": -1.484375,
318
+ "logps/chosen": -146.0,
319
+ "logps/rejected": -251.0,
320
+ "loss": 0.0002,
321
+ "rewards/accuracies": 1.0,
322
+ "rewards/chosen": -0.88671875,
323
+ "rewards/margins": 11.4375,
324
+ "rewards/rejected": -12.3125,
325
+ "step": 190
326
+ },
327
+ {
328
+ "epoch": 1.2779552715654952,
329
+ "grad_norm": 0.030239526412913494,
330
+ "learning_rate": 3.182897862232779e-07,
331
+ "logits/chosen": -1.5390625,
332
+ "logits/rejected": -1.4609375,
333
+ "logps/chosen": -134.0,
334
+ "logps/rejected": -252.0,
335
+ "loss": 0.0003,
336
+ "rewards/accuracies": 1.0,
337
+ "rewards/chosen": -0.9453125,
338
+ "rewards/margins": 11.6875,
339
+ "rewards/rejected": -12.625,
340
+ "step": 200
341
+ },
342
+ {
343
+ "epoch": 1.34185303514377,
344
+ "grad_norm": 0.0080485091857848,
345
+ "learning_rate": 3.0641330166270784e-07,
346
+ "logits/chosen": -1.5859375,
347
+ "logits/rejected": -1.5078125,
348
+ "logps/chosen": -154.0,
349
+ "logps/rejected": -260.0,
350
+ "loss": 0.0002,
351
+ "rewards/accuracies": 1.0,
352
+ "rewards/chosen": -0.97265625,
353
+ "rewards/margins": 11.8125,
354
+ "rewards/rejected": -12.8125,
355
+ "step": 210
356
+ },
357
+ {
358
+ "epoch": 1.4057507987220448,
359
+ "grad_norm": 0.01503760669759659,
360
+ "learning_rate": 2.9453681710213776e-07,
361
+ "logits/chosen": -1.59375,
362
+ "logits/rejected": -1.5234375,
363
+ "logps/chosen": -154.0,
364
+ "logps/rejected": -256.0,
365
+ "loss": 0.0009,
366
+ "rewards/accuracies": 1.0,
367
+ "rewards/chosen": -0.890625,
368
+ "rewards/margins": 11.8125,
369
+ "rewards/rejected": -12.6875,
370
+ "step": 220
371
+ },
372
+ {
373
+ "epoch": 1.4696485623003195,
374
+ "grad_norm": 0.01745595116822664,
375
+ "learning_rate": 2.8266033254156767e-07,
376
+ "logits/chosen": -1.5703125,
377
+ "logits/rejected": -1.5078125,
378
+ "logps/chosen": -152.0,
379
+ "logps/rejected": -256.0,
380
+ "loss": 0.0004,
381
+ "rewards/accuracies": 1.0,
382
+ "rewards/chosen": -0.73828125,
383
+ "rewards/margins": 12.0,
384
+ "rewards/rejected": -12.75,
385
+ "step": 230
386
+ },
387
+ {
388
+ "epoch": 1.5335463258785942,
389
+ "grad_norm": 0.010639490905132864,
390
+ "learning_rate": 2.7078384798099764e-07,
391
+ "logits/chosen": -1.6015625,
392
+ "logits/rejected": -1.5,
393
+ "logps/chosen": -152.0,
394
+ "logps/rejected": -262.0,
395
+ "loss": 0.0001,
396
+ "rewards/accuracies": 1.0,
397
+ "rewards/chosen": -0.8359375,
398
+ "rewards/margins": 12.125,
399
+ "rewards/rejected": -13.0,
400
+ "step": 240
401
+ },
402
+ {
403
+ "epoch": 1.5974440894568689,
404
+ "grad_norm": 0.021325021450464145,
405
+ "learning_rate": 2.589073634204275e-07,
406
+ "logits/chosen": -1.53125,
407
+ "logits/rejected": -1.5,
408
+ "logps/chosen": -153.0,
409
+ "logps/rejected": -256.0,
410
+ "loss": 0.0005,
411
+ "rewards/accuracies": 1.0,
412
+ "rewards/chosen": -0.80859375,
413
+ "rewards/margins": 11.6875,
414
+ "rewards/rejected": -12.5,
415
+ "step": 250
416
+ },
417
+ {
418
+ "epoch": 1.6613418530351438,
419
+ "grad_norm": 0.03402727752531963,
420
+ "learning_rate": 2.4703087885985747e-07,
421
+ "logits/chosen": -1.546875,
422
+ "logits/rejected": -1.5234375,
423
+ "logps/chosen": -143.0,
424
+ "logps/rejected": -255.0,
425
+ "loss": 0.0002,
426
+ "rewards/accuracies": 1.0,
427
+ "rewards/chosen": -0.7265625,
428
+ "rewards/margins": 11.6875,
429
+ "rewards/rejected": -12.4375,
430
+ "step": 260
431
+ },
432
+ {
433
+ "epoch": 1.7252396166134185,
434
+ "grad_norm": 0.01735332750787422,
435
+ "learning_rate": 2.351543942992874e-07,
436
+ "logits/chosen": -1.6171875,
437
+ "logits/rejected": -1.5078125,
438
+ "logps/chosen": -165.0,
439
+ "logps/rejected": -260.0,
440
+ "loss": 0.0001,
441
+ "rewards/accuracies": 1.0,
442
+ "rewards/chosen": -0.80859375,
443
+ "rewards/margins": 12.1875,
444
+ "rewards/rejected": -13.0,
445
+ "step": 270
446
+ },
447
+ {
448
+ "epoch": 1.7891373801916934,
449
+ "grad_norm": 0.02817904426646656,
450
+ "learning_rate": 2.2327790973871732e-07,
451
+ "logits/chosen": -1.5703125,
452
+ "logits/rejected": -1.5078125,
453
+ "logps/chosen": -158.0,
454
+ "logps/rejected": -266.0,
455
+ "loss": 0.0001,
456
+ "rewards/accuracies": 1.0,
457
+ "rewards/chosen": -0.7734375,
458
+ "rewards/margins": 12.375,
459
+ "rewards/rejected": -13.125,
460
+ "step": 280
461
+ },
462
+ {
463
+ "epoch": 1.8530351437699681,
464
+ "grad_norm": 0.026646622883463183,
465
+ "learning_rate": 2.1140142517814726e-07,
466
+ "logits/chosen": -1.5859375,
467
+ "logits/rejected": -1.46875,
468
+ "logps/chosen": -154.0,
469
+ "logps/rejected": -258.0,
470
+ "loss": 0.0007,
471
+ "rewards/accuracies": 1.0,
472
+ "rewards/chosen": -0.90234375,
473
+ "rewards/margins": 11.9375,
474
+ "rewards/rejected": -12.875,
475
+ "step": 290
476
+ },
477
+ {
478
+ "epoch": 1.9169329073482428,
479
+ "grad_norm": 0.005464018856443713,
480
+ "learning_rate": 1.9952494061757718e-07,
481
+ "logits/chosen": -1.5546875,
482
+ "logits/rejected": -1.4765625,
483
+ "logps/chosen": -155.0,
484
+ "logps/rejected": -268.0,
485
+ "loss": 0.0001,
486
+ "rewards/accuracies": 1.0,
487
+ "rewards/chosen": -1.296875,
488
+ "rewards/margins": 12.3125,
489
+ "rewards/rejected": -13.625,
490
+ "step": 300
491
+ },
492
+ {
493
+ "epoch": 1.9808306709265175,
494
+ "grad_norm": 0.01607250092110866,
495
+ "learning_rate": 1.876484560570071e-07,
496
+ "logits/chosen": -1.5546875,
497
+ "logits/rejected": -1.4765625,
498
+ "logps/chosen": -152.0,
499
+ "logps/rejected": -268.0,
500
+ "loss": 0.0001,
501
+ "rewards/accuracies": 1.0,
502
+ "rewards/chosen": -1.34375,
503
+ "rewards/margins": 12.5625,
504
+ "rewards/rejected": -13.875,
505
+ "step": 310
506
+ },
507
+ {
508
+ "epoch": 2.0,
509
+ "eval_logits/chosen": -1.5078125,
510
+ "eval_logits/rejected": -1.515625,
511
+ "eval_logps/chosen": -151.0,
512
+ "eval_logps/rejected": -266.0,
513
+ "eval_loss": 1.9190907551092096e-05,
514
+ "eval_rewards/accuracies": 1.0,
515
+ "eval_rewards/chosen": -1.1640625,
516
+ "eval_rewards/margins": 12.3125,
517
+ "eval_rewards/rejected": -13.4375,
518
+ "eval_runtime": 11.5575,
519
+ "eval_samples_per_second": 17.305,
520
+ "eval_steps_per_second": 0.606,
521
+ "step": 313
522
  }
523
  ],
524
  "logging_steps": 10,