kooff11 commited on
Commit
ef56c99
·
verified ·
1 Parent(s): ef65dc6

Training in progress, step 27, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d66c403cdd318e9ea464acc3be84788a49b39f3efcac2737e1c82743e5fb752
3
  size 82460660
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcad08300ab33773e7842beae8bc97e3b8f46b8304b3c34b61ed6271564656cb
3
  size 82460660
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:20a29dde21dfe173516419b977db3c1775d1cd4ba8f6bd6e1a59919a4f0685ef
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26e775ec5d2f7a6bfc6850d2e1aad7fc89c1cf1b52e210dfc0be05b8df7fabb4
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3cd3306f67ab1d184a941a93922d03608c525054b65e45368eab572e7e3d4940
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00dd6870748d6f432ed334cbec439196364896117466a0420ceec28dd8a722a6
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0eef01f601d0942e2858e183d109e520edd8d27fcbc4627e3dd5104e8484712
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f4cb172acc239bdfcb1321474587dd4513dd004b0281034cdef9d5bfde0d5ff
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7777777777777778,
5
  "eval_steps": 7,
6
- "global_step": 21,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -186,6 +186,48 @@
186
  "eval_samples_per_second": 10.065,
187
  "eval_steps_per_second": 2.544,
188
  "step": 21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
189
  }
190
  ],
191
  "logging_steps": 1,
@@ -200,12 +242,12 @@
200
  "should_evaluate": false,
201
  "should_log": false,
202
  "should_save": true,
203
- "should_training_stop": false
204
  },
205
  "attributes": {}
206
  }
207
  },
208
- "total_flos": 4.697539906413527e+17,
209
  "train_batch_size": 2,
210
  "trial_name": null,
211
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
  "eval_steps": 7,
6
+ "global_step": 27,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
186
  "eval_samples_per_second": 10.065,
187
  "eval_steps_per_second": 2.544,
188
  "step": 21
189
+ },
190
+ {
191
+ "epoch": 0.8148148148148148,
192
+ "grad_norm": NaN,
193
+ "learning_rate": 9.549150281252633e-06,
194
+ "loss": 0.0,
195
+ "step": 22
196
+ },
197
+ {
198
+ "epoch": 0.8518518518518519,
199
+ "grad_norm": NaN,
200
+ "learning_rate": 6.184665997806832e-06,
201
+ "loss": 0.0,
202
+ "step": 23
203
+ },
204
+ {
205
+ "epoch": 0.8888888888888888,
206
+ "grad_norm": NaN,
207
+ "learning_rate": 3.511175705587433e-06,
208
+ "loss": 0.0,
209
+ "step": 24
210
+ },
211
+ {
212
+ "epoch": 0.9259259259259259,
213
+ "grad_norm": NaN,
214
+ "learning_rate": 1.5708419435684462e-06,
215
+ "loss": 0.0,
216
+ "step": 25
217
+ },
218
+ {
219
+ "epoch": 0.9629629629629629,
220
+ "grad_norm": NaN,
221
+ "learning_rate": 3.9426493427611177e-07,
222
+ "loss": 0.0,
223
+ "step": 26
224
+ },
225
+ {
226
+ "epoch": 1.0,
227
+ "grad_norm": NaN,
228
+ "learning_rate": 0.0,
229
+ "loss": 0.0,
230
+ "step": 27
231
  }
232
  ],
233
  "logging_steps": 1,
 
242
  "should_evaluate": false,
243
  "should_log": false,
244
  "should_save": true,
245
+ "should_training_stop": true
246
  },
247
  "attributes": {}
248
  }
249
  },
250
+ "total_flos": 6.03969416538882e+17,
251
  "train_batch_size": 2,
252
  "trial_name": null,
253
  "trial_params": null