Rakhman16 commited on
Commit
329a611
·
verified ·
1 Parent(s): cc2bbcf

Training in progress, step 5692, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2aa3dfa629d6f0a4b00e9aa0db2fda3e9ab1e4c509f72b23ef5457288b2e474f
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3282a74e588d167af885979217f2163838ce9140b961d4759ca970b8f45a34f8
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd9c7ab2e3359d64280354b30eccac6666a4328bc54eac0eee212a29f2aaf43c
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d991a87c3c47037577ad22c7ff6d19f18ff612aa04b6016ab49561b700922f83
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a194bef12ceeecc6ad10e9d032a837c1a3c1db13b2c2253686b43518ae42503
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a37fbc263c505f02f6ce9dcccc6ed9c167c85ba9bd4249a320a5f924d43f35b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f66a57bd394806719fb22948b621f468b035d0a2c7c49f5f8b90d526b96103c4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4ca5c4e853eca5507dffbf1162373b8e51b2a8cdc79c4e01dce5ddf18b3fd7c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.20129592716693878,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-5500",
4
- "epoch": 3.8637161924833157,
5
  "eval_steps": 100,
6
- "global_step": 5500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1217,6 +1217,35 @@
1217
  "eval_samples_per_second": 66.382,
1218
  "eval_steps_per_second": 2.084,
1219
  "step": 5500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1220
  }
1221
  ],
1222
  "logging_steps": 50,
@@ -1231,12 +1260,12 @@
1231
  "should_evaluate": false,
1232
  "should_log": false,
1233
  "should_save": true,
1234
- "should_training_stop": false
1235
  },
1236
  "attributes": {}
1237
  }
1238
  },
1239
- "total_flos": 5.35814435930112e+16,
1240
  "train_batch_size": 32,
1241
  "trial_name": null,
1242
  "trial_params": null
 
1
  {
2
  "best_metric": 0.20129592716693878,
3
  "best_model_checkpoint": "./fine-tuned/checkpoint-5500",
4
+ "epoch": 3.9985950122936424,
5
  "eval_steps": 100,
6
+ "global_step": 5692,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1217
  "eval_samples_per_second": 66.382,
1218
  "eval_steps_per_second": 2.084,
1219
  "step": 5500
1220
+ },
1221
+ {
1222
+ "epoch": 3.898840885142255,
1223
+ "grad_norm": 22639.22265625,
1224
+ "learning_rate": 7.484188334504568e-07,
1225
+ "loss": 0.1898,
1226
+ "step": 5550
1227
+ },
1228
+ {
1229
+ "epoch": 3.933965577801194,
1230
+ "grad_norm": 108627.9453125,
1231
+ "learning_rate": 4.848910751932538e-07,
1232
+ "loss": 0.1887,
1233
+ "step": 5600
1234
+ },
1235
+ {
1236
+ "epoch": 3.933965577801194,
1237
+ "eval_loss": 0.2013484090566635,
1238
+ "eval_runtime": 67.1981,
1239
+ "eval_samples_per_second": 66.371,
1240
+ "eval_steps_per_second": 2.083,
1241
+ "step": 5600
1242
+ },
1243
+ {
1244
+ "epoch": 3.9690902704601334,
1245
+ "grad_norm": 28155.427734375,
1246
+ "learning_rate": 2.213633169360506e-07,
1247
+ "loss": 0.1955,
1248
+ "step": 5650
1249
  }
1250
  ],
1251
  "logging_steps": 50,
 
1260
  "should_evaluate": false,
1261
  "should_log": false,
1262
  "should_save": true,
1263
+ "should_training_stop": true
1264
  },
1265
  "attributes": {}
1266
  }
1267
  },
1268
+ "total_flos": 5.545216223281152e+16,
1269
  "train_batch_size": 32,
1270
  "trial_name": null,
1271
  "trial_params": null