ncbateman commited on
Commit
1b7bf54
·
verified ·
1 Parent(s): f2f68b4

Training in progress, step 811, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7158fc026fcb3c75de92fe98a9e7575457f3d0ddfdd21de716103cd610f69de9
3
  size 792912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55e901950eda5c3c11519f5869e78400ddd745c2f3fe32041dc5dd5a35a5e996
3
  size 792912
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e19210c69c04c9e2774683c62f913e9f0f21be0a7cfcac2e1acf7c710d1c386
3
  size 807226
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73ed21f6575ce7ac1358e5cf5c7c69d83efbe73affe310120a69fabf245a1f71
3
  size 807226
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22ad685c1237efec2f2e8de413bc38dd1475089fe9b28d8d43fe1c45017827ac
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95854d5cc00d71cecca852780bc1bcfd6e88e4fb1b955160cbf9a3145c24d1f4
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53dc596014d0f9b21145357c41c37d57500c8efda8a0f2014bb2d988a043e59f
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c29de9e8fa8a45e29d4ef4934d4078cdeb0cb620b4247b9553ca2b1a8cbd29d
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:223b0451f4d9b8dd81e35f5a28bc073e05755799383bf2e8538e323aed665d3b
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34653e4e06f0f7c40ebe51d437e12f779eafc5b99b228a15e55333dd8cf2018e
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18d17262a3d5928a646b0fefef89331d9cdec36c92fdf757662a6e2bf2165b6d
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b961f6d9076ef8d42e30189d58f3020d35ceddece8ef8239ee39ccc91e6823f5
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f17ccafd97486ba1c378a3ba81af6282c7ec0337562d5f30dc98873ace309528
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70b60170e4ee207b411221d3212e057a7eb914ba6755abafc4c8bac2ff50fb1f
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.987045033929673,
5
  "eval_steps": 203,
6
- "global_step": 800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -5639,6 +5639,83 @@
5639
  "learning_rate": 5.154436320919942e-08,
5640
  "loss": 4.0969,
5641
  "step": 800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5642
  }
5643
  ],
5644
  "logging_steps": 1,
@@ -5653,12 +5730,12 @@
5653
  "should_evaluate": false,
5654
  "should_log": false,
5655
  "should_save": true,
5656
- "should_training_stop": false
5657
  },
5658
  "attributes": {}
5659
  }
5660
  },
5661
- "total_flos": 5313340347777024.0,
5662
  "train_batch_size": 2,
5663
  "trial_name": null,
5664
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.000616903146206,
5
  "eval_steps": 203,
6
+ "global_step": 811,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
5639
  "learning_rate": 5.154436320919942e-08,
5640
  "loss": 4.0969,
5641
  "step": 800
5642
+ },
5643
+ {
5644
+ "epoch": 0.9882788402220851,
5645
+ "grad_norm": 657.191162109375,
5646
+ "learning_rate": 4.25999178087888e-08,
5647
+ "loss": 4.127,
5648
+ "step": 801
5649
+ },
5650
+ {
5651
+ "epoch": 0.9895126465144972,
5652
+ "grad_norm": 618.2482299804688,
5653
+ "learning_rate": 3.4506864562133815e-08,
5654
+ "loss": 3.99,
5655
+ "step": 802
5656
+ },
5657
+ {
5658
+ "epoch": 0.9907464528069093,
5659
+ "grad_norm": 729.18798828125,
5660
+ "learning_rate": 2.7265341393983845e-08,
5661
+ "loss": 4.0454,
5662
+ "step": 803
5663
+ },
5664
+ {
5665
+ "epoch": 0.9919802590993214,
5666
+ "grad_norm": 660.1466064453125,
5667
+ "learning_rate": 2.0875471717013427e-08,
5668
+ "loss": 4.1317,
5669
+ "step": 804
5670
+ },
5671
+ {
5672
+ "epoch": 0.9932140653917335,
5673
+ "grad_norm": 619.5026245117188,
5674
+ "learning_rate": 1.5337364429696132e-08,
5675
+ "loss": 3.9811,
5676
+ "step": 805
5677
+ },
5678
+ {
5679
+ "epoch": 0.9944478716841456,
5680
+ "grad_norm": 849.2465209960938,
5681
+ "learning_rate": 1.065111391447271e-08,
5682
+ "loss": 4.1834,
5683
+ "step": 806
5684
+ },
5685
+ {
5686
+ "epoch": 0.9956816779765577,
5687
+ "grad_norm": 727.19580078125,
5688
+ "learning_rate": 6.816800036124615e-09,
5689
+ "loss": 4.224,
5690
+ "step": 807
5691
+ },
5692
+ {
5693
+ "epoch": 0.9969154842689698,
5694
+ "grad_norm": 646.8365478515625,
5695
+ "learning_rate": 3.8344881404195345e-09,
5696
+ "loss": 3.9375,
5697
+ "step": 808
5698
+ },
5699
+ {
5700
+ "epoch": 0.9981492905613819,
5701
+ "grad_norm": 956.5570068359375,
5702
+ "learning_rate": 1.7042290529956095e-09,
5703
+ "loss": 4.0443,
5704
+ "step": 809
5705
+ },
5706
+ {
5707
+ "epoch": 0.999383096853794,
5708
+ "grad_norm": 1171.58642578125,
5709
+ "learning_rate": 4.260590785121199e-10,
5710
+ "loss": 4.0195,
5711
+ "step": 810
5712
+ },
5713
+ {
5714
+ "epoch": 1.000616903146206,
5715
+ "grad_norm": 823.234130859375,
5716
+ "learning_rate": 0.0,
5717
+ "loss": 4.1185,
5718
+ "step": 811
5719
  }
5720
  ],
5721
  "logging_steps": 1,
 
5730
  "should_evaluate": false,
5731
  "should_log": false,
5732
  "should_save": true,
5733
+ "should_training_stop": true
5734
  },
5735
  "attributes": {}
5736
  }
5737
  },
5738
+ "total_flos": 5387190162948096.0,
5739
  "train_batch_size": 2,
5740
  "trial_name": null,
5741
  "trial_params": null