Rakhman16 commited on
Commit
98239b4
·
verified ·
1 Parent(s): 0e2b3c4

Training in progress, step 12000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5bdcafd7e54ccfb54edd3811725f1984a354498d7153f053c3cdf7217ec9db4
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12d89b6cfeb0fa2a639fb1d022803e910e636a1653929f3379ba8ecc07936d2d
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78e8caa6e190a79f2c47539fc7bdacd9073500128a21f89148735b0653b16ff7
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16e8ca890c29387dcb94fe4fee166151a4647fb651f909dbd97850a259675cfe
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3d22d79498e9a42f44dadc83e0bfe26c6297fe6f1a1339b834940e632f50f9f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8eee71f4c759651379c503d3028bec932d355f171dd7453ec6f5c469e966f747
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c19fddbbe59fe77d9c9931e2dfec577f342f095ed5843c735b486fb4141326d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b63e11db1a8e7c7a242100e7b3a9500ec8f1ad290a19c61a227cd5ed6d79dcc2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.1035689190030098,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-11000",
4
- "epoch": 2.019847194168789,
5
  "eval_steps": 100,
6
- "global_step": 11500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2537,6 +2537,116 @@
2537
  "eval_samples_per_second": 25.457,
2538
  "eval_steps_per_second": 3.185,
2539
  "step": 11500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2540
  }
2541
  ],
2542
  "logging_steps": 50,
@@ -2556,7 +2666,7 @@
2556
  "attributes": {}
2557
  }
2558
  },
2559
- "total_flos": 5.601986322628608e+16,
2560
  "train_batch_size": 8,
2561
  "trial_name": null,
2562
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.10353059321641922,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-12000",
4
+ "epoch": 2.107666637393519,
5
  "eval_steps": 100,
6
+ "global_step": 12000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2537
  "eval_samples_per_second": 25.457,
2538
  "eval_steps_per_second": 3.185,
2539
  "step": 11500
2540
+ },
2541
+ {
2542
+ "epoch": 2.028629138491262,
2543
+ "grad_norm": 8534.212890625,
2544
+ "learning_rate": 1.4783945195854559e-05,
2545
+ "loss": 0.0969,
2546
+ "step": 11550
2547
+ },
2548
+ {
2549
+ "epoch": 2.037411082813735,
2550
+ "grad_norm": 10853.4990234375,
2551
+ "learning_rate": 1.4718074828737047e-05,
2552
+ "loss": 0.1016,
2553
+ "step": 11600
2554
+ },
2555
+ {
2556
+ "epoch": 2.037411082813735,
2557
+ "eval_loss": 0.10368319600820541,
2558
+ "eval_runtime": 175.2089,
2559
+ "eval_samples_per_second": 25.455,
2560
+ "eval_steps_per_second": 3.185,
2561
+ "step": 11600
2562
+ },
2563
+ {
2564
+ "epoch": 2.046193027136208,
2565
+ "grad_norm": 10575.98828125,
2566
+ "learning_rate": 1.4652204461619534e-05,
2567
+ "loss": 0.0993,
2568
+ "step": 11650
2569
+ },
2570
+ {
2571
+ "epoch": 2.054974971458681,
2572
+ "grad_norm": 17123.625,
2573
+ "learning_rate": 1.458633409450202e-05,
2574
+ "loss": 0.1011,
2575
+ "step": 11700
2576
+ },
2577
+ {
2578
+ "epoch": 2.054974971458681,
2579
+ "eval_loss": 0.10356248915195465,
2580
+ "eval_runtime": 175.2585,
2581
+ "eval_samples_per_second": 25.448,
2582
+ "eval_steps_per_second": 3.184,
2583
+ "step": 11700
2584
+ },
2585
+ {
2586
+ "epoch": 2.0637569157811537,
2587
+ "grad_norm": 13095.1728515625,
2588
+ "learning_rate": 1.4520463727384507e-05,
2589
+ "loss": 0.0956,
2590
+ "step": 11750
2591
+ },
2592
+ {
2593
+ "epoch": 2.0725388601036268,
2594
+ "grad_norm": 11280.3291015625,
2595
+ "learning_rate": 1.4454593360266995e-05,
2596
+ "loss": 0.0969,
2597
+ "step": 11800
2598
+ },
2599
+ {
2600
+ "epoch": 2.0725388601036268,
2601
+ "eval_loss": 0.10367120802402496,
2602
+ "eval_runtime": 175.3325,
2603
+ "eval_samples_per_second": 25.437,
2604
+ "eval_steps_per_second": 3.183,
2605
+ "step": 11800
2606
+ },
2607
+ {
2608
+ "epoch": 2.0813208044261,
2609
+ "grad_norm": 8861.095703125,
2610
+ "learning_rate": 1.4388722993149482e-05,
2611
+ "loss": 0.0971,
2612
+ "step": 11850
2613
+ },
2614
+ {
2615
+ "epoch": 2.090102748748573,
2616
+ "grad_norm": 15480.5634765625,
2617
+ "learning_rate": 1.432285262603197e-05,
2618
+ "loss": 0.0977,
2619
+ "step": 11900
2620
+ },
2621
+ {
2622
+ "epoch": 2.090102748748573,
2623
+ "eval_loss": 0.1037474200129509,
2624
+ "eval_runtime": 175.4966,
2625
+ "eval_samples_per_second": 25.414,
2626
+ "eval_steps_per_second": 3.18,
2627
+ "step": 11900
2628
+ },
2629
+ {
2630
+ "epoch": 2.098884693071046,
2631
+ "grad_norm": 10802.611328125,
2632
+ "learning_rate": 1.4256982258914457e-05,
2633
+ "loss": 0.0979,
2634
+ "step": 11950
2635
+ },
2636
+ {
2637
+ "epoch": 2.107666637393519,
2638
+ "grad_norm": 7810.14111328125,
2639
+ "learning_rate": 1.4191111891796943e-05,
2640
+ "loss": 0.0963,
2641
+ "step": 12000
2642
+ },
2643
+ {
2644
+ "epoch": 2.107666637393519,
2645
+ "eval_loss": 0.10353059321641922,
2646
+ "eval_runtime": 175.3863,
2647
+ "eval_samples_per_second": 25.43,
2648
+ "eval_steps_per_second": 3.182,
2649
+ "step": 12000
2650
  }
2651
  ],
2652
  "logging_steps": 50,
 
2666
  "attributes": {}
2667
  }
2668
  },
2669
+ "total_flos": 5.845569478852608e+16,
2670
  "train_batch_size": 8,
2671
  "trial_name": null,
2672
  "trial_params": null