prxy5608 commited on
Commit
7321cf4
·
verified ·
1 Parent(s): 02dbf3e

Training in progress, step 150, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:864af0cc3ce39c8ddf96fb0943cfcb694123fc73c223b009c56e1b9516c05d24
3
  size 191968
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93517d21005be70e4c463b84e7c3e68f8c431eadd1fd10f1058fcc8d328f0fa5
3
  size 191968
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5843e9c3965b9c400f8cdff7a29eca288c1f3812e31f7cc55b243d75d4295414
3
  size 253144
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53d6c512d08cdfcc30f40caab3dc36006165eca1e237c4d6b8f2251cd5911b1f
3
  size 253144
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3fd520aa016ab6b2b4fb3d5f77690bcead8650f76330a74ec630e62e2557831
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:330e5faaa3878010c9991fb905084cef46b2217cbfd5607ef9941420efefa448
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:058219947ff711529dad66039b98666e7f25c784536319d0c623dbf17121adfa
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c535b91583e044007e2d58e4865f12783f75470fac48c23be105e95a5f5108d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 10.300241470336914,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-100",
4
- "epoch": 1.9138755980861244,
5
  "eval_steps": 50,
6
- "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -731,6 +731,364 @@
731
  "eval_samples_per_second": 427.459,
732
  "eval_steps_per_second": 106.865,
733
  "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
734
  }
735
  ],
736
  "logging_steps": 1,
@@ -759,7 +1117,7 @@
759
  "attributes": {}
760
  }
761
  },
762
- "total_flos": 11155066060800.0,
763
  "train_batch_size": 8,
764
  "trial_name": null,
765
  "trial_params": null
 
1
  {
2
+ "best_metric": 10.294066429138184,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
+ "epoch": 2.8708133971291865,
5
  "eval_steps": 50,
6
+ "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
731
  "eval_samples_per_second": 427.459,
732
  "eval_steps_per_second": 106.865,
733
  "step": 100
734
+ },
735
+ {
736
+ "epoch": 1.9330143540669855,
737
+ "grad_norm": 0.14589062333106995,
738
+ "learning_rate": 3.173294878168025e-05,
739
+ "loss": 10.4054,
740
+ "step": 101
741
+ },
742
+ {
743
+ "epoch": 1.9521531100478469,
744
+ "grad_norm": 0.13475301861763,
745
+ "learning_rate": 3.074249318355046e-05,
746
+ "loss": 10.1998,
747
+ "step": 102
748
+ },
749
+ {
750
+ "epoch": 1.9712918660287082,
751
+ "grad_norm": 0.14838005602359772,
752
+ "learning_rate": 2.976083284388031e-05,
753
+ "loss": 10.3857,
754
+ "step": 103
755
+ },
756
+ {
757
+ "epoch": 1.9904306220095693,
758
+ "grad_norm": 0.15033847093582153,
759
+ "learning_rate": 2.8788416105048122e-05,
760
+ "loss": 10.591,
761
+ "step": 104
762
+ },
763
+ {
764
+ "epoch": 2.0095693779904304,
765
+ "grad_norm": 0.22326327860355377,
766
+ "learning_rate": 2.7825687087709328e-05,
767
+ "loss": 18.1498,
768
+ "step": 105
769
+ },
770
+ {
771
+ "epoch": 2.028708133971292,
772
+ "grad_norm": 0.14351670444011688,
773
+ "learning_rate": 2.687308548795825e-05,
774
+ "loss": 9.8884,
775
+ "step": 106
776
+ },
777
+ {
778
+ "epoch": 2.047846889952153,
779
+ "grad_norm": 0.12684540450572968,
780
+ "learning_rate": 2.5931046376510877e-05,
781
+ "loss": 10.2986,
782
+ "step": 107
783
+ },
784
+ {
785
+ "epoch": 2.0669856459330145,
786
+ "grad_norm": 0.14553603529930115,
787
+ "learning_rate": 2.500000000000001e-05,
788
+ "loss": 10.2596,
789
+ "step": 108
790
+ },
791
+ {
792
+ "epoch": 2.0861244019138754,
793
+ "grad_norm": 0.13355287909507751,
794
+ "learning_rate": 2.4080371584473748e-05,
795
+ "loss": 10.2515,
796
+ "step": 109
797
+ },
798
+ {
799
+ "epoch": 2.1052631578947367,
800
+ "grad_norm": 0.1391756683588028,
801
+ "learning_rate": 2.317258114118686e-05,
802
+ "loss": 10.3434,
803
+ "step": 110
804
+ },
805
+ {
806
+ "epoch": 2.124401913875598,
807
+ "grad_norm": 0.1632007211446762,
808
+ "learning_rate": 2.2277043274773857e-05,
809
+ "loss": 10.3774,
810
+ "step": 111
811
+ },
812
+ {
813
+ "epoch": 2.1435406698564594,
814
+ "grad_norm": 0.11006581783294678,
815
+ "learning_rate": 2.139416699389153e-05,
816
+ "loss": 10.237,
817
+ "step": 112
818
+ },
819
+ {
820
+ "epoch": 2.1626794258373208,
821
+ "grad_norm": 0.11960204690694809,
822
+ "learning_rate": 2.0524355524417017e-05,
823
+ "loss": 10.2388,
824
+ "step": 113
825
+ },
826
+ {
827
+ "epoch": 2.1818181818181817,
828
+ "grad_norm": 0.13326919078826904,
829
+ "learning_rate": 1.966800612528723e-05,
830
+ "loss": 10.3773,
831
+ "step": 114
832
+ },
833
+ {
834
+ "epoch": 2.200956937799043,
835
+ "grad_norm": 0.12038934975862503,
836
+ "learning_rate": 1.8825509907063327e-05,
837
+ "loss": 10.2409,
838
+ "step": 115
839
+ },
840
+ {
841
+ "epoch": 2.2200956937799043,
842
+ "grad_norm": 0.15435650944709778,
843
+ "learning_rate": 1.7997251653303248e-05,
844
+ "loss": 10.3346,
845
+ "step": 116
846
+ },
847
+ {
848
+ "epoch": 2.2392344497607657,
849
+ "grad_norm": 0.14944836497306824,
850
+ "learning_rate": 1.7183609644824096e-05,
851
+ "loss": 10.6496,
852
+ "step": 117
853
+ },
854
+ {
855
+ "epoch": 2.258373205741627,
856
+ "grad_norm": 0.12455789744853973,
857
+ "learning_rate": 1.6384955486934156e-05,
858
+ "loss": 10.3332,
859
+ "step": 118
860
+ },
861
+ {
862
+ "epoch": 2.277511961722488,
863
+ "grad_norm": 0.11132438480854034,
864
+ "learning_rate": 1.5601653939714074e-05,
865
+ "loss": 9.8656,
866
+ "step": 119
867
+ },
868
+ {
869
+ "epoch": 2.2966507177033493,
870
+ "grad_norm": 0.13063865900039673,
871
+ "learning_rate": 1.4834062751424015e-05,
872
+ "loss": 10.3357,
873
+ "step": 120
874
+ },
875
+ {
876
+ "epoch": 2.3157894736842106,
877
+ "grad_norm": 0.11636406928300858,
878
+ "learning_rate": 1.4082532495113626e-05,
879
+ "loss": 10.2896,
880
+ "step": 121
881
+ },
882
+ {
883
+ "epoch": 2.334928229665072,
884
+ "grad_norm": 0.1426575481891632,
885
+ "learning_rate": 1.3347406408508695e-05,
886
+ "loss": 10.2307,
887
+ "step": 122
888
+ },
889
+ {
890
+ "epoch": 2.354066985645933,
891
+ "grad_norm": 0.14399978518486023,
892
+ "learning_rate": 1.262902023724824e-05,
893
+ "loss": 10.3345,
894
+ "step": 123
895
+ },
896
+ {
897
+ "epoch": 2.373205741626794,
898
+ "grad_norm": 0.1384967416524887,
899
+ "learning_rate": 1.1927702081543279e-05,
900
+ "loss": 10.3376,
901
+ "step": 124
902
+ },
903
+ {
904
+ "epoch": 2.3923444976076556,
905
+ "grad_norm": 0.10496751964092255,
906
+ "learning_rate": 1.1243772246327416e-05,
907
+ "loss": 10.221,
908
+ "step": 125
909
+ },
910
+ {
911
+ "epoch": 2.411483253588517,
912
+ "grad_norm": 0.13061700761318207,
913
+ "learning_rate": 1.0577543094967612e-05,
914
+ "loss": 10.3965,
915
+ "step": 126
916
+ },
917
+ {
918
+ "epoch": 2.430622009569378,
919
+ "grad_norm": 0.12669506669044495,
920
+ "learning_rate": 9.929318906602175e-06,
921
+ "loss": 10.2988,
922
+ "step": 127
923
+ },
924
+ {
925
+ "epoch": 2.449760765550239,
926
+ "grad_norm": 0.10814495384693146,
927
+ "learning_rate": 9.299395737170757e-06,
928
+ "loss": 10.2391,
929
+ "step": 128
930
+ },
931
+ {
932
+ "epoch": 2.4688995215311005,
933
+ "grad_norm": 0.12308809906244278,
934
+ "learning_rate": 8.688061284200266e-06,
935
+ "loss": 10.328,
936
+ "step": 129
937
+ },
938
+ {
939
+ "epoch": 2.488038277511962,
940
+ "grad_norm": 0.14173907041549683,
941
+ "learning_rate": 8.09559475540797e-06,
942
+ "loss": 10.8135,
943
+ "step": 130
944
+ },
945
+ {
946
+ "epoch": 2.507177033492823,
947
+ "grad_norm": 0.12133615463972092,
948
+ "learning_rate": 7.522266741182305e-06,
949
+ "loss": 10.2866,
950
+ "step": 131
951
+ },
952
+ {
953
+ "epoch": 2.526315789473684,
954
+ "grad_norm": 0.10576584190130234,
955
+ "learning_rate": 6.968339090999187e-06,
956
+ "loss": 9.7405,
957
+ "step": 132
958
+ },
959
+ {
960
+ "epoch": 2.5454545454545454,
961
+ "grad_norm": 0.10492077469825745,
962
+ "learning_rate": 6.43406479383053e-06,
963
+ "loss": 10.3311,
964
+ "step": 133
965
+ },
966
+ {
967
+ "epoch": 2.5645933014354068,
968
+ "grad_norm": 0.13342593610286713,
969
+ "learning_rate": 5.919687862599549e-06,
970
+ "loss": 10.2719,
971
+ "step": 134
972
+ },
973
+ {
974
+ "epoch": 2.583732057416268,
975
+ "grad_norm": 0.12062268704175949,
976
+ "learning_rate": 5.425443222735527e-06,
977
+ "loss": 10.2956,
978
+ "step": 135
979
+ },
980
+ {
981
+ "epoch": 2.6028708133971294,
982
+ "grad_norm": 0.11691385507583618,
983
+ "learning_rate": 4.951556604879048e-06,
984
+ "loss": 10.4012,
985
+ "step": 136
986
+ },
987
+ {
988
+ "epoch": 2.6220095693779903,
989
+ "grad_norm": 0.1241801381111145,
990
+ "learning_rate": 4.498244441786675e-06,
991
+ "loss": 10.2174,
992
+ "step": 137
993
+ },
994
+ {
995
+ "epoch": 2.6411483253588517,
996
+ "grad_norm": 0.09667439758777618,
997
+ "learning_rate": 4.065713769482082e-06,
998
+ "loss": 10.2467,
999
+ "step": 138
1000
+ },
1001
+ {
1002
+ "epoch": 2.660287081339713,
1003
+ "grad_norm": 0.09752865880727768,
1004
+ "learning_rate": 3.654162132698918e-06,
1005
+ "loss": 10.3165,
1006
+ "step": 139
1007
+ },
1008
+ {
1009
+ "epoch": 2.679425837320574,
1010
+ "grad_norm": 0.1245708093047142,
1011
+ "learning_rate": 3.2637774946584486e-06,
1012
+ "loss": 10.3543,
1013
+ "step": 140
1014
+ },
1015
+ {
1016
+ "epoch": 2.6985645933014353,
1017
+ "grad_norm": 0.12063057720661163,
1018
+ "learning_rate": 2.894738151223331e-06,
1019
+ "loss": 10.2164,
1020
+ "step": 141
1021
+ },
1022
+ {
1023
+ "epoch": 2.7177033492822966,
1024
+ "grad_norm": 0.14080384373664856,
1025
+ "learning_rate": 2.547212649466568e-06,
1026
+ "loss": 10.3849,
1027
+ "step": 142
1028
+ },
1029
+ {
1030
+ "epoch": 2.736842105263158,
1031
+ "grad_norm": 0.13528694212436676,
1032
+ "learning_rate": 2.221359710692961e-06,
1033
+ "loss": 10.7423,
1034
+ "step": 143
1035
+ },
1036
+ {
1037
+ "epoch": 2.7559808612440193,
1038
+ "grad_norm": 0.10822786390781403,
1039
+ "learning_rate": 1.9173281579481892e-06,
1040
+ "loss": 10.0585,
1041
+ "step": 144
1042
+ },
1043
+ {
1044
+ "epoch": 2.77511961722488,
1045
+ "grad_norm": 0.10477904230356216,
1046
+ "learning_rate": 1.6352568480485276e-06,
1047
+ "loss": 10.0791,
1048
+ "step": 145
1049
+ },
1050
+ {
1051
+ "epoch": 2.7942583732057416,
1052
+ "grad_norm": 0.1134805977344513,
1053
+ "learning_rate": 1.3752746081624467e-06,
1054
+ "loss": 10.2513,
1055
+ "step": 146
1056
+ },
1057
+ {
1058
+ "epoch": 2.813397129186603,
1059
+ "grad_norm": 0.14501194655895233,
1060
+ "learning_rate": 1.1375001769727999e-06,
1061
+ "loss": 10.2943,
1062
+ "step": 147
1063
+ },
1064
+ {
1065
+ "epoch": 2.8325358851674642,
1066
+ "grad_norm": 0.11421407014131546,
1067
+ "learning_rate": 9.220421504467281e-07,
1068
+ "loss": 10.2723,
1069
+ "step": 148
1070
+ },
1071
+ {
1072
+ "epoch": 2.8516746411483256,
1073
+ "grad_norm": 0.12981349229812622,
1074
+ "learning_rate": 7.289989322378732e-07,
1075
+ "loss": 10.4036,
1076
+ "step": 149
1077
+ },
1078
+ {
1079
+ "epoch": 2.8708133971291865,
1080
+ "grad_norm": 0.1481410712003708,
1081
+ "learning_rate": 5.584586887435739e-07,
1082
+ "loss": 10.2066,
1083
+ "step": 150
1084
+ },
1085
+ {
1086
+ "epoch": 2.8708133971291865,
1087
+ "eval_loss": 10.294066429138184,
1088
+ "eval_runtime": 0.2055,
1089
+ "eval_samples_per_second": 428.249,
1090
+ "eval_steps_per_second": 107.062,
1091
+ "step": 150
1092
  }
1093
  ],
1094
  "logging_steps": 1,
 
1117
  "attributes": {}
1118
  }
1119
  },
1120
+ "total_flos": 16732599091200.0,
1121
  "train_batch_size": 8,
1122
  "trial_name": null,
1123
  "trial_params": null