ben81828 commited on
Commit
19cef52
·
verified ·
1 Parent(s): 4873a52

Training in progress, step 2400, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d159d3ba185322d9f83c53738ceab5e5c90742b79ccfaf680796d68d03ad7724
3
  size 29034840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9d5ec3c39553916ddeed05ebc73c5ecc3358eb275ee803834c3cbefd37b2102
3
  size 29034840
last-checkpoint/global_step2399/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4577996b31a6b8a56e9904449dc510f2e89b1a976d452770e5e328bc6a0f199
3
+ size 43429616
last-checkpoint/global_step2399/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2761daf2b7b3495246b19cf153163e9196305cdbb7009bcc566438f93ec17064
3
+ size 43429616
last-checkpoint/global_step2399/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:871748468fe9fc78e000545dab697fda41763346312ee75b29a300747db57657
3
+ size 43429616
last-checkpoint/global_step2399/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:264ed4f45d0a81458eca0848ba1c849cd1034e602ecce07f7629a6cf5712ab1e
3
+ size 43429616
last-checkpoint/global_step2399/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5b9c23eef2a4c3db1e183f63421a1cd0bcd927687f7473fa1ffdaec4b0fe396
3
+ size 637299
last-checkpoint/global_step2399/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9aaebcfa876d33eec282f09be9bf7e06877cddd316fe66ac4c9b8df8087de3e
3
+ size 637171
last-checkpoint/global_step2399/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1e27213c0a50dcec5a10490092aff333ead082a73f56d0f2799679c8c1a09a9
3
+ size 637171
last-checkpoint/global_step2399/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:731e9d8fb7898d90d13c8d1d4ff5aeae78a7f020be87f4292f1693c89020d687
3
+ size 637171
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step2349
 
1
+ global_step2399
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:681faefe4cf303ca7f9bc3073b09b166da4f558d55bee3d5eee90ba5d83159bb
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b8aff7a1897a7eaf48c78ea1f8115c061edfa2b6fa42280e2c1c58fe66b1f8a
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c0fb250f1d137fb55cef85743a342508178b4fe3a20c6793c82e279730ea280b
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d73a7524f07999ef35d5d9b107dcc1678eae2ada841644e1bd00ec0734368c2
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:34a0d7cfaa34f7e3738b4ef4989d693ed7864fed3b2a44ef1b6892fdcf026bb9
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:149a2ed30e88bf94d622f8d7693f382286a49ac536a3f63efc50cab63f6b9f39
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a82aabe23bc62e289ef7d075c79f353bbc81286ec0f8964eabda4209d630e10
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18f12c1b5aae2b7d4bb968649839fc7ff1ce6131508baad4b633693b04cee910
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:612a67548dc5f963b4ad7edb95989ed33dc109c454a3a196b8ccaf386ef2286b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6242510adeb4d59877ccb2f625a69ecf628b2bfccb966d6143251f5bc2806c24
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.22014015913009644,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4/lora/sft/checkpoint-2250",
4
- "epoch": 1.210146793716199,
5
  "eval_steps": 50,
6
- "global_step": 2350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4190,11 +4190,100 @@
4190
  "eval_steps_per_second": 0.759,
4191
  "num_input_tokens_seen": 27479840,
4192
  "step": 2350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4193
  }
4194
  ],
4195
  "logging_steps": 5,
4196
  "max_steps": 3882,
4197
- "num_input_tokens_seen": 27479840,
4198
  "num_train_epochs": 2,
4199
  "save_steps": 50,
4200
  "stateful_callbacks": {
@@ -4209,7 +4298,7 @@
4209
  "attributes": {}
4210
  }
4211
  },
4212
- "total_flos": 1813240829706240.0,
4213
  "train_batch_size": 1,
4214
  "trial_name": null,
4215
  "trial_params": null
 
1
  {
2
  "best_metric": 0.22014015913009644,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4/lora/sft/checkpoint-2250",
4
+ "epoch": 1.2359000772598505,
5
  "eval_steps": 50,
6
+ "global_step": 2400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4190
  "eval_steps_per_second": 0.759,
4191
  "num_input_tokens_seen": 27479840,
4192
  "step": 2350
4193
+ },
4194
+ {
4195
+ "epoch": 1.212722122070564,
4196
+ "grad_norm": 9.144283973474371,
4197
+ "learning_rate": 3.6678798011291674e-05,
4198
+ "loss": 0.242,
4199
+ "num_input_tokens_seen": 27538344,
4200
+ "step": 2355
4201
+ },
4202
+ {
4203
+ "epoch": 1.2152974504249292,
4204
+ "grad_norm": 12.80063906167105,
4205
+ "learning_rate": 3.647360065838348e-05,
4206
+ "loss": 0.2451,
4207
+ "num_input_tokens_seen": 27596808,
4208
+ "step": 2360
4209
+ },
4210
+ {
4211
+ "epoch": 1.2178727787792945,
4212
+ "grad_norm": 3.44448459243246,
4213
+ "learning_rate": 3.6268648818777105e-05,
4214
+ "loss": 0.2237,
4215
+ "num_input_tokens_seen": 27655272,
4216
+ "step": 2365
4217
+ },
4218
+ {
4219
+ "epoch": 1.2204481071336595,
4220
+ "grad_norm": 14.301919876267943,
4221
+ "learning_rate": 3.606394621248709e-05,
4222
+ "loss": 0.2033,
4223
+ "num_input_tokens_seen": 27713784,
4224
+ "step": 2370
4225
+ },
4226
+ {
4227
+ "epoch": 1.2230234354880247,
4228
+ "grad_norm": 11.349699284999199,
4229
+ "learning_rate": 3.585949655500429e-05,
4230
+ "loss": 0.2741,
4231
+ "num_input_tokens_seen": 27772248,
4232
+ "step": 2375
4233
+ },
4234
+ {
4235
+ "epoch": 1.22559876384239,
4236
+ "grad_norm": 4.036313729279471,
4237
+ "learning_rate": 3.5655303557228335e-05,
4238
+ "loss": 0.2677,
4239
+ "num_input_tokens_seen": 27830704,
4240
+ "step": 2380
4241
+ },
4242
+ {
4243
+ "epoch": 1.228174092196755,
4244
+ "grad_norm": 5.042006341816937,
4245
+ "learning_rate": 3.545137092540035e-05,
4246
+ "loss": 0.2117,
4247
+ "num_input_tokens_seen": 27889160,
4248
+ "step": 2385
4249
+ },
4250
+ {
4251
+ "epoch": 1.2307494205511202,
4252
+ "grad_norm": 4.191635671104207,
4253
+ "learning_rate": 3.524770236103556e-05,
4254
+ "loss": 0.2216,
4255
+ "num_input_tokens_seen": 27947608,
4256
+ "step": 2390
4257
+ },
4258
+ {
4259
+ "epoch": 1.2333247489054855,
4260
+ "grad_norm": 13.530120109841187,
4261
+ "learning_rate": 3.504430156085629e-05,
4262
+ "loss": 0.28,
4263
+ "num_input_tokens_seen": 28006112,
4264
+ "step": 2395
4265
+ },
4266
+ {
4267
+ "epoch": 1.2359000772598505,
4268
+ "grad_norm": 23.252663154179324,
4269
+ "learning_rate": 3.484117221672465e-05,
4270
+ "loss": 0.2766,
4271
+ "num_input_tokens_seen": 28064552,
4272
+ "step": 2400
4273
+ },
4274
+ {
4275
+ "epoch": 1.2359000772598505,
4276
+ "eval_loss": 0.2361450344324112,
4277
+ "eval_runtime": 19.8674,
4278
+ "eval_samples_per_second": 3.02,
4279
+ "eval_steps_per_second": 0.755,
4280
+ "num_input_tokens_seen": 28064552,
4281
+ "step": 2400
4282
  }
4283
  ],
4284
  "logging_steps": 5,
4285
  "max_steps": 3882,
4286
+ "num_input_tokens_seen": 28064552,
4287
  "num_train_epochs": 2,
4288
  "save_steps": 50,
4289
  "stateful_callbacks": {
 
4298
  "attributes": {}
4299
  }
4300
  },
4301
+ "total_flos": 1851822579122176.0,
4302
  "train_batch_size": 1,
4303
  "trial_name": null,
4304
  "trial_params": null