ben81828 commited on
Commit
1542de6
·
verified ·
1 Parent(s): 0544051

Training in progress, step 2450, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9d5ec3c39553916ddeed05ebc73c5ecc3358eb275ee803834c3cbefd37b2102
3
  size 29034840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e815dfd4544e66028785a2440a38262f9d7e3b347ea1c255fd921b0ddf689205
3
  size 29034840
last-checkpoint/global_step2449/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df913b606dc17e04f911185b2e90ac4c6d2f835c77acde131fac03606ba59b57
3
+ size 43429616
last-checkpoint/global_step2449/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40bf06dc89bc81ae43dd663a01a967bb52ec7e73cc89537eae97f9ecc404c291
3
+ size 43429616
last-checkpoint/global_step2449/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea547cde4ba847bc195d4961853f5087a4f24ce42d0ed6863652de7516898a41
3
+ size 43429616
last-checkpoint/global_step2449/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:854eb496733ec15bd9c4450b945123107d61526ca981135a7ae727e338ff167e
3
+ size 43429616
last-checkpoint/global_step2449/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a82c5df390d3257a5352379097918600d041610d4e5ba1e974b3e229bdd67a8c
3
+ size 637299
last-checkpoint/global_step2449/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97da9743c1e36ab23675aa22a5b8411e2e0ed805854ceca8f076e43f31695414
3
+ size 637171
last-checkpoint/global_step2449/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a135547822e5461284280560c9a7414587b25a0dc806306e40ba6b78474ea00e
3
+ size 637171
last-checkpoint/global_step2449/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a161b670106cc9758bc9709b5057592d4ed400d53d5d1dc95f84e4f1e869d6ad
3
+ size 637171
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step2399
 
1
+ global_step2449
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b8aff7a1897a7eaf48c78ea1f8115c061edfa2b6fa42280e2c1c58fe66b1f8a
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a97c73c15a2a5b2de7dc426a700b2053aee43809425431c513cc5e3aab6c2107
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d73a7524f07999ef35d5d9b107dcc1678eae2ada841644e1bd00ec0734368c2
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1296b339c1b16ab7e14352a269004d20ede428aef748283fb0a6650d62f58129
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:149a2ed30e88bf94d622f8d7693f382286a49ac536a3f63efc50cab63f6b9f39
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:000b1637f5e73170f2337500a6a083df3a43d967d642b6c3a68f60deb6c3b960
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18f12c1b5aae2b7d4bb968649839fc7ff1ce6131508baad4b633693b04cee910
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8bcb6e7802f6d888bc099642911087298cfb1adf7053a2d43a67192a53404ef
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6242510adeb4d59877ccb2f625a69ecf628b2bfccb966d6143251f5bc2806c24
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca831f953944cb39f4c82163568f885af5876e2e989b1167b35966dbf1db8e86
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.22014015913009644,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4/lora/sft/checkpoint-2250",
4
- "epoch": 1.2359000772598505,
5
  "eval_steps": 50,
6
- "global_step": 2400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4279,11 +4279,100 @@
4279
  "eval_steps_per_second": 0.755,
4280
  "num_input_tokens_seen": 28064552,
4281
  "step": 2400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4282
  }
4283
  ],
4284
  "logging_steps": 5,
4285
  "max_steps": 3882,
4286
- "num_input_tokens_seen": 28064552,
4287
  "num_train_epochs": 2,
4288
  "save_steps": 50,
4289
  "stateful_callbacks": {
@@ -4298,7 +4387,7 @@
4298
  "attributes": {}
4299
  }
4300
  },
4301
- "total_flos": 1851822579122176.0,
4302
  "train_batch_size": 1,
4303
  "trial_name": null,
4304
  "trial_params": null
 
1
  {
2
  "best_metric": 0.22014015913009644,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4/lora/sft/checkpoint-2250",
4
+ "epoch": 1.2616533608035025,
5
  "eval_steps": 50,
6
+ "global_step": 2450,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4279
  "eval_steps_per_second": 0.755,
4280
  "num_input_tokens_seen": 28064552,
4281
  "step": 2400
4282
+ },
4283
+ {
4284
+ "epoch": 1.2384754056142158,
4285
+ "grad_norm": 16.379510205091783,
4286
+ "learning_rate": 3.463831801557577e-05,
4287
+ "loss": 0.1933,
4288
+ "num_input_tokens_seen": 28123016,
4289
+ "step": 2405
4290
+ },
4291
+ {
4292
+ "epoch": 1.241050733968581,
4293
+ "grad_norm": 3.6631986381781556,
4294
+ "learning_rate": 3.443574263935062e-05,
4295
+ "loss": 0.2014,
4296
+ "num_input_tokens_seen": 28181464,
4297
+ "step": 2410
4298
+ },
4299
+ {
4300
+ "epoch": 1.2436260623229463,
4301
+ "grad_norm": 5.654529592966364,
4302
+ "learning_rate": 3.42334497649294e-05,
4303
+ "loss": 0.2383,
4304
+ "num_input_tokens_seen": 28239920,
4305
+ "step": 2415
4306
+ },
4307
+ {
4308
+ "epoch": 1.2462013906773113,
4309
+ "grad_norm": 14.005033039787135,
4310
+ "learning_rate": 3.403144306406466e-05,
4311
+ "loss": 0.2074,
4312
+ "num_input_tokens_seen": 28298432,
4313
+ "step": 2420
4314
+ },
4315
+ {
4316
+ "epoch": 1.2487767190316765,
4317
+ "grad_norm": 7.470199127973674,
4318
+ "learning_rate": 3.382972620331475e-05,
4319
+ "loss": 0.2202,
4320
+ "num_input_tokens_seen": 28356904,
4321
+ "step": 2425
4322
+ },
4323
+ {
4324
+ "epoch": 1.2513520473860418,
4325
+ "grad_norm": 3.9239650586287484,
4326
+ "learning_rate": 3.362830284397716e-05,
4327
+ "loss": 0.1756,
4328
+ "num_input_tokens_seen": 28415384,
4329
+ "step": 2430
4330
+ },
4331
+ {
4332
+ "epoch": 1.2539273757404068,
4333
+ "grad_norm": 32.67443269914418,
4334
+ "learning_rate": 3.342717664202223e-05,
4335
+ "loss": 0.2564,
4336
+ "num_input_tokens_seen": 28473848,
4337
+ "step": 2435
4338
+ },
4339
+ {
4340
+ "epoch": 1.256502704094772,
4341
+ "grad_norm": 8.79661830656602,
4342
+ "learning_rate": 3.322635124802658e-05,
4343
+ "loss": 0.242,
4344
+ "num_input_tokens_seen": 28532312,
4345
+ "step": 2440
4346
+ },
4347
+ {
4348
+ "epoch": 1.2590780324491373,
4349
+ "grad_norm": 9.41003020617472,
4350
+ "learning_rate": 3.3025830307107035e-05,
4351
+ "loss": 0.3455,
4352
+ "num_input_tokens_seen": 28590784,
4353
+ "step": 2445
4354
+ },
4355
+ {
4356
+ "epoch": 1.2616533608035025,
4357
+ "grad_norm": 23.884422578805488,
4358
+ "learning_rate": 3.2825617458854376e-05,
4359
+ "loss": 0.3076,
4360
+ "num_input_tokens_seen": 28649256,
4361
+ "step": 2450
4362
+ },
4363
+ {
4364
+ "epoch": 1.2616533608035025,
4365
+ "eval_loss": 0.2371988147497177,
4366
+ "eval_runtime": 19.6997,
4367
+ "eval_samples_per_second": 3.046,
4368
+ "eval_steps_per_second": 0.761,
4369
+ "num_input_tokens_seen": 28649256,
4370
+ "step": 2450
4371
  }
4372
  ],
4373
  "logging_steps": 5,
4374
  "max_steps": 3882,
4375
+ "num_input_tokens_seen": 28649256,
4376
  "num_train_epochs": 2,
4377
  "save_steps": 50,
4378
  "stateful_callbacks": {
 
4387
  "attributes": {}
4388
  }
4389
  },
4390
+ "total_flos": 1890404691345408.0,
4391
  "train_batch_size": 1,
4392
  "trial_name": null,
4393
  "trial_params": null