kooff11 commited on
Commit
3798936
·
verified ·
1 Parent(s): de8d8f3

Training in progress, step 35, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f54285af820322f413bedf90a197fab9546824b126b677e531d7a3e34609c30c
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bde0c002a7b60c273b49837e4b851b5ed52b90428c55fb5930c878504512491
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28c7adf268e87dd0a41446aaa874ecc5ab743fb9a94da50a96ececdd5a3bee8f
3
  size 85723284
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3488476f49083977e86d89a5ee9a6936f17d633cb87e683274b5f9fba011b84a
3
  size 85723284
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9fc23edab8ba1108a86a5e66e050703f0e0017d14e4377e9cbd25e332b216c4b
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:467264eb57c59c955e813f6c6054e4885698642ee168306af423fe1c31108531
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4a43d18fc96a7898854e74a08413f3781a39c478df43b8b248b0577bb9030b97
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7aea477e0332bd49137a9e7099a31c4933b9652e14b7c883036946eaf8f98dec
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2bb4b588419b60e1f59dda48eeb3b428275266887083bc814e29c779f54f5c8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:400690333b498d3c3ea4cd1446f834c1cb37202647b1e68e58a488ea7599ce2b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7783783783783784,
5
  "eval_steps": 9,
6
- "global_step": 27,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -228,6 +228,62 @@
228
  "eval_samples_per_second": 10.197,
229
  "eval_steps_per_second": 2.571,
230
  "step": 27
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  }
232
  ],
233
  "logging_steps": 1,
@@ -242,12 +298,12 @@
242
  "should_evaluate": false,
243
  "should_log": false,
244
  "should_save": true,
245
- "should_training_stop": false
246
  },
247
  "attributes": {}
248
  }
249
  },
250
- "total_flos": 6.075038868606812e+17,
251
  "train_batch_size": 2,
252
  "trial_name": null,
253
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.009009009009009,
5
  "eval_steps": 9,
6
+ "global_step": 35,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
228
  "eval_samples_per_second": 10.197,
229
  "eval_steps_per_second": 2.571,
230
  "step": 27
231
+ },
232
+ {
233
+ "epoch": 0.8072072072072072,
234
+ "grad_norm": 3.502157688140869,
235
+ "learning_rate": 1.0697345262860636e-05,
236
+ "loss": 24.1016,
237
+ "step": 28
238
+ },
239
+ {
240
+ "epoch": 0.836036036036036,
241
+ "grad_norm": 3.466275930404663,
242
+ "learning_rate": 7.937323358440935e-06,
243
+ "loss": 23.6451,
244
+ "step": 29
245
+ },
246
+ {
247
+ "epoch": 0.8648648648648649,
248
+ "grad_norm": 3.245530128479004,
249
+ "learning_rate": 5.558227567253832e-06,
250
+ "loss": 23.9141,
251
+ "step": 30
252
+ },
253
+ {
254
+ "epoch": 0.8936936936936937,
255
+ "grad_norm": 3.150803804397583,
256
+ "learning_rate": 3.581603349196372e-06,
257
+ "loss": 23.176,
258
+ "step": 31
259
+ },
260
+ {
261
+ "epoch": 0.9225225225225225,
262
+ "grad_norm": 3.4905145168304443,
263
+ "learning_rate": 2.0253513192751373e-06,
264
+ "loss": 23.8849,
265
+ "step": 32
266
+ },
267
+ {
268
+ "epoch": 0.9513513513513514,
269
+ "grad_norm": 3.366611957550049,
270
+ "learning_rate": 9.035651368646648e-07,
271
+ "loss": 23.4103,
272
+ "step": 33
273
+ },
274
+ {
275
+ "epoch": 0.9801801801801802,
276
+ "grad_norm": 3.181520700454712,
277
+ "learning_rate": 2.2640387134577058e-07,
278
+ "loss": 22.1288,
279
+ "step": 34
280
+ },
281
+ {
282
+ "epoch": 1.009009009009009,
283
+ "grad_norm": 3.0065629482269287,
284
+ "learning_rate": 0.0,
285
+ "loss": 22.3399,
286
+ "step": 35
287
  }
288
  ],
289
  "logging_steps": 1,
 
298
  "should_evaluate": false,
299
  "should_log": false,
300
  "should_save": true,
301
+ "should_training_stop": true
302
  },
303
  "attributes": {}
304
  }
305
  },
306
+ "total_flos": 7.875050385231053e+17,
307
  "train_batch_size": 2,
308
  "trial_name": null,
309
  "trial_params": null