masatochi commited on
Commit
549146b
·
verified ·
1 Parent(s): 8f11ace

Training in progress, step 110, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f211ae62887dce0fa42817d05e0aabed7b171367db2fd132779e56a4899860e
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8557a1dc450e339c881c0652db2b6d90b2b501380a19f24a9f51e39ce445defe
3
  size 83945296
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:82b28abe9afb6f0a471ca1628e1779cb9272b993bee79979db7696d79f8a646d
3
  size 43122580
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ac0f74b16d60fb235f73decc69c05ba931afc139f20893d0c63b85d240c73ab
3
  size 43122580
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d228656fc4662fbb158acab12ab048938d5ec4032384bd70b245c74cf2162ee1
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a4992a0a541377e74f550ef72fa9af6f8fee0ce175cbdc61ec06b94024e86a4
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e8c855c846898181ed358c1ef65b19ad1435172d9025fde7f25f4580bfc48faa
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:802e09b6cc63e64e726d0b68ba37b81d6a6fcf54cdf00e4821b3e38426a8a5c4
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.051347881899871634,
5
  "eval_steps": 34,
6
- "global_step": 105,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -774,6 +774,41 @@
774
  "learning_rate": 0.00011837495178165706,
775
  "loss": 1.0653,
776
  "step": 105
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
777
  }
778
  ],
779
  "logging_steps": 1,
@@ -793,7 +828,7 @@
793
  "attributes": {}
794
  }
795
  },
796
- "total_flos": 4.6609019053277184e+17,
797
  "train_batch_size": 3,
798
  "trial_name": null,
799
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.05379301913319885,
5
  "eval_steps": 34,
6
+ "global_step": 110,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
774
  "learning_rate": 0.00011837495178165706,
775
  "loss": 1.0653,
776
  "step": 105
777
+ },
778
+ {
779
+ "epoch": 0.05183690934653708,
780
+ "grad_norm": 0.28122061491012573,
781
+ "learning_rate": 0.000116555387618413,
782
+ "loss": 1.0055,
783
+ "step": 106
784
+ },
785
+ {
786
+ "epoch": 0.05232593679320252,
787
+ "grad_norm": 0.5028234720230103,
788
+ "learning_rate": 0.00011473016980546377,
789
+ "loss": 0.9866,
790
+ "step": 107
791
+ },
792
+ {
793
+ "epoch": 0.052814964239867965,
794
+ "grad_norm": 0.3072775602340698,
795
+ "learning_rate": 0.00011289992165302035,
796
+ "loss": 1.1557,
797
+ "step": 108
798
+ },
799
+ {
800
+ "epoch": 0.05330399168653341,
801
+ "grad_norm": 0.2929151654243469,
802
+ "learning_rate": 0.00011106526818915008,
803
+ "loss": 1.04,
804
+ "step": 109
805
+ },
806
+ {
807
+ "epoch": 0.05379301913319885,
808
+ "grad_norm": 0.35853561758995056,
809
+ "learning_rate": 0.00010922683594633021,
810
+ "loss": 1.0056,
811
+ "step": 110
812
  }
813
  ],
814
  "logging_steps": 1,
 
828
  "attributes": {}
829
  }
830
  },
831
+ "total_flos": 4.882849615105229e+17,
832
  "train_batch_size": 3,
833
  "trial_name": null,
834
  "trial_params": null