Training in progress, step 138, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +494 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:72d932876dc769bf93f82b0534690ecaf491a27dd3494f4b390be0317f04d933
 size 335922386

 version https://git-lfs.github.com/spec/v1
+oid sha256:d64006a8240d0814491b2db4b937e17fb7b606088a33ab275a336effb5b52496
 size 335922386

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5c2106bd7c77f119bfbd0e7216f6b8076246c5b56169d9758a5766704f7fd3ac
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:4ac89b50eb49875d4fc6320c442b1f1a2bb0c6ca5dcf4534babea7e4fa581fbf
 size 14512

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d552cf0ac1340b257e68c2f09f146d8a7526b80238d90cb501f4380a5acaac65
 size 14512

 version https://git-lfs.github.com/spec/v1
+oid sha256:aa2482d7eb8b9907f50055efed6d979a680e476b4380cec06a223fb30358eb52
 size 14512

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9263945a6777ffd183084f656dbf9a8ade54f242aec4e02deae1e6e4a03b7dfd
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:05243bc9418b5d027b9cd58d0b804f8898dee9480e9cd6d09120cb4b16d4e2f3
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.25125170687300863,
   "eval_steps": 69,
-  "global_step": 69,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -506,6 +506,497 @@
       "eval_samples_per_second": 8.365,
       "eval_steps_per_second": 2.096,
       "step": 69
     }
   ],
   "logging_steps": 1,
@@ -525,7 +1016,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.940639639006085e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null

 {
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.5025034137460173,
   "eval_steps": 69,
+  "global_step": 138,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 8.365,
       "eval_steps_per_second": 2.096,
       "step": 69
+    },
+    {
+      "epoch": 0.25489303595812474,
+      "grad_norm": NaN,
+      "learning_rate": 0.00019609173219450998,
+      "loss": 0.0,
+      "step": 70
+    },
+    {
+      "epoch": 0.2585343650432408,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001956940335732209,
+      "loss": 0.0,
+      "step": 71
+    },
+    {
+      "epoch": 0.26217569412835684,
+      "grad_norm": NaN,
+      "learning_rate": 0.00019527751227228963,
+      "loss": 0.0,
+      "step": 72
+    },
+    {
+      "epoch": 0.2658170232134729,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001948422502199903,
+      "loss": 0.0,
+      "step": 73
+    },
+    {
+      "epoch": 0.269458352298589,
+      "grad_norm": NaN,
+      "learning_rate": 0.00019438833303083678,
+      "loss": 0.0,
+      "step": 74
+    },
+    {
+      "epoch": 0.27309968138370505,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001939158499887428,
+      "loss": 0.0,
+      "step": 75
+    },
+    {
+      "epoch": 0.2767410104688211,
+      "grad_norm": NaN,
+      "learning_rate": 0.00019342489402945998,
+      "loss": 0.0,
+      "step": 76
+    },
+    {
+      "epoch": 0.2803823395539372,
+      "grad_norm": NaN,
+      "learning_rate": 0.00019291556172229785,
+      "loss": 0.0,
+      "step": 77
+    },
+    {
+      "epoch": 0.28402366863905326,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001923879532511287,
+      "loss": 0.0,
+      "step": 78
+    },
+    {
+      "epoch": 0.2876649977241693,
+      "grad_norm": NaN,
+      "learning_rate": 0.00019184217239468212,
+      "loss": 0.0,
+      "step": 79
+    },
+    {
+      "epoch": 0.29130632680928537,
+      "grad_norm": NaN,
+      "learning_rate": 0.00019127832650613189,
+      "loss": 0.0,
+      "step": 80
+    },
+    {
+      "epoch": 0.2949476558944015,
+      "grad_norm": NaN,
+      "learning_rate": 0.00019069652649198005,
+      "loss": 0.0,
+      "step": 81
+    },
+    {
+      "epoch": 0.2985889849795175,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001900968867902419,
+      "loss": 0.0,
+      "step": 82
+    },
+    {
+      "epoch": 0.3022303140646336,
+      "grad_norm": NaN,
+      "learning_rate": 0.00018947952534793661,
+      "loss": 0.0,
+      "step": 83
+    },
+    {
+      "epoch": 0.3058716431497497,
+      "grad_norm": NaN,
+      "learning_rate": 0.00018884456359788724,
+      "loss": 0.0,
+      "step": 84
+    },
+    {
+      "epoch": 0.30951297223486574,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001881921264348355,
+      "loss": 0.0,
+      "step": 85
+    },
+    {
+      "epoch": 0.3131543013199818,
+      "grad_norm": NaN,
+      "learning_rate": 0.00018752234219087538,
+      "loss": 0.0,
+      "step": 86
+    },
+    {
+      "epoch": 0.31679563040509784,
+      "grad_norm": NaN,
+      "learning_rate": 0.00018683534261021057,
+      "loss": 0.0,
+      "step": 87
+    },
+    {
+      "epoch": 0.32043695949021395,
+      "grad_norm": NaN,
+      "learning_rate": 0.00018613126282324092,
+      "loss": 0.0,
+      "step": 88
+    },
+    {
+      "epoch": 0.32407828857533,
+      "grad_norm": NaN,
+      "learning_rate": 0.00018541024131998274,
+      "loss": 0.0,
+      "step": 89
+    },
+    {
+      "epoch": 0.32771961766044605,
+      "grad_norm": NaN,
+      "learning_rate": 0.00018467241992282843,
+      "loss": 0.0,
+      "step": 90
+    },
+    {
+      "epoch": 0.33136094674556216,
+      "grad_norm": NaN,
+      "learning_rate": 0.00018391794375865024,
+      "loss": 0.0,
+      "step": 91
+    },
+    {
+      "epoch": 0.3350022758306782,
+      "grad_norm": NaN,
+      "learning_rate": 0.00018314696123025454,
+      "loss": 0.0,
+      "step": 92
+    },
+    {
+      "epoch": 0.33864360491579426,
+      "grad_norm": NaN,
+      "learning_rate": 0.00018235962398719147,
+      "loss": 0.0,
+      "step": 93
+    },
+    {
+      "epoch": 0.3422849340009103,
+      "grad_norm": NaN,
+      "learning_rate": 0.00018155608689592604,
+      "loss": 0.0,
+      "step": 94
+    },
+    {
+      "epoch": 0.3459262630860264,
+      "grad_norm": NaN,
+      "learning_rate": 0.00018073650800937624,
+      "loss": 0.0,
+      "step": 95
+    },
+    {
+      "epoch": 0.34956759217114247,
+      "grad_norm": NaN,
+      "learning_rate": 0.00017990104853582493,
+      "loss": 0.0,
+      "step": 96
+    },
+    {
+      "epoch": 0.3532089212562585,
+      "grad_norm": NaN,
+      "learning_rate": 0.00017904987280721035,
+      "loss": 0.0,
+      "step": 97
+    },
+    {
+      "epoch": 0.3568502503413746,
+      "grad_norm": NaN,
+      "learning_rate": 0.000178183148246803,
+      "loss": 0.0,
+      "step": 98
+    },
+    {
+      "epoch": 0.3604915794264907,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001773010453362737,
+      "loss": 0.0,
+      "step": 99
+    },
+    {
+      "epoch": 0.36413290851160673,
+      "grad_norm": NaN,
+      "learning_rate": 0.00017640373758216077,
+      "loss": 0.0,
+      "step": 100
+    },
+    {
+      "epoch": 0.3677742375967228,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001754914014817416,
+      "loss": 0.0,
+      "step": 101
+    },
+    {
+      "epoch": 0.3714155666818389,
+      "grad_norm": NaN,
+      "learning_rate": 0.00017456421648831655,
+      "loss": 0.0,
+      "step": 102
+    },
+    {
+      "epoch": 0.37505689576695495,
+      "grad_norm": NaN,
+      "learning_rate": 0.00017362236497591094,
+      "loss": 0.0,
+      "step": 103
+    },
+    {
+      "epoch": 0.378698224852071,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001726660322034027,
+      "loss": 0.0,
+      "step": 104
+    },
+    {
+      "epoch": 0.38233955393718705,
+      "grad_norm": NaN,
+      "learning_rate": 0.00017169540627808274,
+      "loss": 0.0,
+      "step": 105
+    },
+    {
+      "epoch": 0.38598088302230316,
+      "grad_norm": NaN,
+      "learning_rate": 0.00017071067811865476,
+      "loss": 0.0,
+      "step": 106
+    },
+    {
+      "epoch": 0.3896222121074192,
+      "grad_norm": NaN,
+      "learning_rate": 0.00016971204141768233,
+      "loss": 0.0,
+      "step": 107
+    },
+    {
+      "epoch": 0.39326354119253526,
+      "grad_norm": NaN,
+      "learning_rate": 0.00016869969260349018,
+      "loss": 0.0,
+      "step": 108
+    },
+    {
+      "epoch": 0.39690487027765137,
+      "grad_norm": NaN,
+      "learning_rate": 0.00016767383080152742,
+      "loss": 0.0,
+      "step": 109
+    },
+    {
+      "epoch": 0.4005461993627674,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001666346577952004,
+      "loss": 0.0,
+      "step": 110
+    },
+    {
+      "epoch": 0.40418752844788347,
+      "grad_norm": NaN,
+      "learning_rate": 0.00016558237798618245,
+      "loss": 0.0,
+      "step": 111
+    },
+    {
+      "epoch": 0.4078288575329995,
+      "grad_norm": NaN,
+      "learning_rate": 0.00016451719835420877,
+      "loss": 0.0,
+      "step": 112
+    },
+    {
+      "epoch": 0.41147018661811563,
+      "grad_norm": NaN,
+      "learning_rate": 0.00016343932841636456,
+      "loss": 0.0,
+      "step": 113
+    },
+    {
+      "epoch": 0.4151115157032317,
+      "grad_norm": NaN,
+      "learning_rate": 0.00016234898018587337,
+      "loss": 0.0,
+      "step": 114
+    },
+    {
+      "epoch": 0.41875284478834773,
+      "grad_norm": NaN,
+      "learning_rate": 0.00016124636813039502,
+      "loss": 0.0,
+      "step": 115
+    },
+    {
+      "epoch": 0.42239417387346384,
+      "grad_norm": NaN,
+      "learning_rate": 0.00016013170912984058,
+      "loss": 0.0,
+      "step": 116
+    },
+    {
+      "epoch": 0.4260355029585799,
+      "grad_norm": NaN,
+      "learning_rate": 0.00015900522243371282,
+      "loss": 0.0,
+      "step": 117
+    },
+    {
+      "epoch": 0.42967683204369594,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001578671296179806,
+      "loss": 0.0,
+      "step": 118
+    },
+    {
+      "epoch": 0.433318161128812,
+      "grad_norm": NaN,
+      "learning_rate": 0.00015671765454149559,
+      "loss": 0.0,
+      "step": 119
+    },
+    {
+      "epoch": 0.4369594902139281,
+      "grad_norm": NaN,
+      "learning_rate": 0.00015555702330196023,
+      "loss": 0.0,
+      "step": 120
+    },
+    {
+      "epoch": 0.44060081929904416,
+      "grad_norm": NaN,
+      "learning_rate": 0.00015438546419145488,
+      "loss": 0.0,
+      "step": 121
+    },
+    {
+      "epoch": 0.4442421483841602,
+      "grad_norm": NaN,
+      "learning_rate": 0.00015320320765153367,
+      "loss": 0.0,
+      "step": 122
+    },
+    {
+      "epoch": 0.44788347746927626,
+      "grad_norm": NaN,
+      "learning_rate": 0.00015201048622789747,
+      "loss": 0.0,
+      "step": 123
+    },
+    {
+      "epoch": 0.45152480655439237,
+      "grad_norm": NaN,
+      "learning_rate": 0.00015080753452465296,
+      "loss": 0.0,
+      "step": 124
+    },
+    {
+      "epoch": 0.4551661356395084,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001495945891581668,
+      "loss": 0.0,
+      "step": 125
+    },
+    {
+      "epoch": 0.45880746472462447,
+      "grad_norm": NaN,
+      "learning_rate": 0.000148371888710524,
+      "loss": 0.0,
+      "step": 126
+    },
+    {
+      "epoch": 0.4624487938097406,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001471396736825998,
+      "loss": 0.0,
+      "step": 127
+    },
+    {
+      "epoch": 0.46609012289485663,
+      "grad_norm": NaN,
+      "learning_rate": 0.00014589818644675378,
+      "loss": 0.0,
+      "step": 128
+    },
+    {
+      "epoch": 0.4697314519799727,
+      "grad_norm": NaN,
+      "learning_rate": 0.00014464767119915629,
+      "loss": 0.0,
+      "step": 129
+    },
+    {
+      "epoch": 0.47337278106508873,
+      "grad_norm": NaN,
+      "learning_rate": 0.00014338837391175582,
+      "loss": 0.0,
+      "step": 130
+    },
+    {
+      "epoch": 0.47701411015020484,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001421205422838971,
+      "loss": 0.0,
+      "step": 131
+    },
+    {
+      "epoch": 0.4806554392353209,
+      "grad_norm": NaN,
+      "learning_rate": 0.00014084442569359964,
+      "loss": 0.0,
+      "step": 132
+    },
+    {
+      "epoch": 0.48429676832043694,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001395602751485059,
+      "loss": 0.0,
+      "step": 133
+    },
+    {
+      "epoch": 0.48793809740555305,
+      "grad_norm": NaN,
+      "learning_rate": 0.000138268343236509,
+      "loss": 0.0,
+      "step": 134
+    },
+    {
+      "epoch": 0.4915794264906691,
+      "grad_norm": NaN,
+      "learning_rate": 0.00013696888407606952,
+      "loss": 0.0,
+      "step": 135
+    },
+    {
+      "epoch": 0.49522075557578515,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001356621532662313,
+      "loss": 0.0,
+      "step": 136
+    },
+    {
+      "epoch": 0.4988620846609012,
+      "grad_norm": NaN,
+      "learning_rate": 0.0001343484078363461,
+      "loss": 0.0,
+      "step": 137
+    },
+    {
+      "epoch": 0.5025034137460173,
+      "grad_norm": NaN,
+      "learning_rate": 0.00013302790619551674,
+      "loss": 0.0,
+      "step": 138
+    },
+    {
+      "epoch": 0.5025034137460173,
+      "eval_loss": NaN,
+      "eval_runtime": 55.2669,
+      "eval_samples_per_second": 8.378,
+      "eval_steps_per_second": 2.099,
+      "step": 138
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 3.88127927801217e+17,
   "train_batch_size": 2,
   "trial_name": null,
   "trial_params": null