besimray commited on
Commit
59bed63
·
verified ·
1 Parent(s): 3fa9b1a

Training in progress, step 100, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f9d3a455b48f6346461c9f928d1610c220bf18f1cefcc0e15177b76d372a6b4
3
  size 335604696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a79d6468f9a8226984a35f278d2afc4996fc80bf229f9e9f4696b88b0244d70
3
  size 335604696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd021e5e05e7646b803bdd3cb4aca11c66b541e5f7ae4070bb9dd51104cce573
3
  size 170920084
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eed3ef08c89e9a07cd3218a687a41dd717cf80833936a581ab6db0cd1f2aabdf
3
  size 170920084
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9be6a0bd48f6bc89b56368d3add1033bdc63312da525cee5e56fe50f3a009c4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fa0933e237e365bbcbd10bdc7014a011ec526d46395768542a06f4bd5bcda7b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:23b27ab0ae2b9af6f3d4c84cdaf8b0fc887acf71f8f726b270a3bce2845000a9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5c84ec0ff3c8c6aa13b25568668096db118f67ce80a9fa015a625446606f15d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.2894105911254883,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-80",
4
- "epoch": 0.025414754677020826,
5
  "eval_steps": 10,
6
- "global_step": 90,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -717,6 +717,84 @@
717
  "eval_samples_per_second": 1.774,
718
  "eval_steps_per_second": 1.774,
719
  "step": 90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
720
  }
721
  ],
722
  "logging_steps": 1,
@@ -731,7 +809,7 @@
731
  "early_stopping_threshold": 0.0
732
  },
733
  "attributes": {
734
- "early_stopping_patience_counter": 1
735
  }
736
  },
737
  "TrainerControl": {
@@ -745,7 +823,7 @@
745
  "attributes": {}
746
  }
747
  },
748
- "total_flos": 6.714094069481472e+16,
749
  "train_batch_size": 1,
750
  "trial_name": null,
751
  "trial_params": null
 
1
  {
2
  "best_metric": 1.2894105911254883,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-80",
4
+ "epoch": 0.028238616307800918,
5
  "eval_steps": 10,
6
+ "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
717
  "eval_samples_per_second": 1.774,
718
  "eval_steps_per_second": 1.774,
719
  "step": 90
720
+ },
721
+ {
722
+ "epoch": 0.025697140840098835,
723
+ "grad_norm": 0.922839343547821,
724
+ "learning_rate": 0.00018681546242521786,
725
+ "loss": 2.2968,
726
+ "step": 91
727
+ },
728
+ {
729
+ "epoch": 0.025979527003176843,
730
+ "grad_norm": 0.9457669258117676,
731
+ "learning_rate": 0.00018649548579446936,
732
+ "loss": 1.5159,
733
+ "step": 92
734
+ },
735
+ {
736
+ "epoch": 0.026261913166254855,
737
+ "grad_norm": 1.3212480545043945,
738
+ "learning_rate": 0.0001861719536730795,
739
+ "loss": 1.8084,
740
+ "step": 93
741
+ },
742
+ {
743
+ "epoch": 0.026544299329332863,
744
+ "grad_norm": 1.1744962930679321,
745
+ "learning_rate": 0.00018584487936018661,
746
+ "loss": 2.3983,
747
+ "step": 94
748
+ },
749
+ {
750
+ "epoch": 0.026826685492410872,
751
+ "grad_norm": 0.9709725379943848,
752
+ "learning_rate": 0.00018551427630053463,
753
+ "loss": 1.8034,
754
+ "step": 95
755
+ },
756
+ {
757
+ "epoch": 0.02710907165548888,
758
+ "grad_norm": 0.878976047039032,
759
+ "learning_rate": 0.00018518015808392045,
760
+ "loss": 1.2276,
761
+ "step": 96
762
+ },
763
+ {
764
+ "epoch": 0.02739145781856689,
765
+ "grad_norm": 1.220984935760498,
766
+ "learning_rate": 0.00018484253844463526,
767
+ "loss": 1.0919,
768
+ "step": 97
769
+ },
770
+ {
771
+ "epoch": 0.0276738439816449,
772
+ "grad_norm": 0.41714727878570557,
773
+ "learning_rate": 0.00018450143126090015,
774
+ "loss": 1.0902,
775
+ "step": 98
776
+ },
777
+ {
778
+ "epoch": 0.02795623014472291,
779
+ "grad_norm": 0.9148246049880981,
780
+ "learning_rate": 0.00018415685055429533,
781
+ "loss": 2.1429,
782
+ "step": 99
783
+ },
784
+ {
785
+ "epoch": 0.028238616307800918,
786
+ "grad_norm": 0.8029168844223022,
787
+ "learning_rate": 0.00018380881048918405,
788
+ "loss": 1.5813,
789
+ "step": 100
790
+ },
791
+ {
792
+ "epoch": 0.028238616307800918,
793
+ "eval_loss": 1.3071253299713135,
794
+ "eval_runtime": 421.1658,
795
+ "eval_samples_per_second": 1.771,
796
+ "eval_steps_per_second": 1.771,
797
+ "step": 100
798
  }
799
  ],
800
  "logging_steps": 1,
 
809
  "early_stopping_threshold": 0.0
810
  },
811
  "attributes": {
812
+ "early_stopping_patience_counter": 2
813
  }
814
  },
815
  "TrainerControl": {
 
823
  "attributes": {}
824
  }
825
  },
826
+ "total_flos": 7.46010452164608e+16,
827
  "train_batch_size": 1,
828
  "trial_name": null,
829
  "trial_params": null