ben81828 commited on
Commit
0141ba0
·
verified ·
1 Parent(s): 736a639

Training in progress, step 1900, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:715ff69f50f84d39187c1f0fc35de081fab6e8a1a5b66268497bd57f97e40762
3
  size 29034840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:920101966caf9f63708128eacbc2e0f1c2ccd11ad0e5ce041ad3a77fc5419432
3
  size 29034840
last-checkpoint/global_step1900/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10004449ffc6fbe7243c0d014ae1bf0c7a32216c00dcb88aaf05da089e831e9e
3
+ size 43429616
last-checkpoint/global_step1900/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c671b50f276fe4e6b55eb594721794eb78e97dfdcb1d79983a027142ec1ed050
3
+ size 43429616
last-checkpoint/global_step1900/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bfe3408bf5e3534dc83c88bd98779cbd9b2aea263d697b0bad3c51905a8109d
3
+ size 43429616
last-checkpoint/global_step1900/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae526fb8f169175aee500162ad359c6134710c3f5d82c9ba5253afe601763db8
3
+ size 43429616
last-checkpoint/global_step1900/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b73c80d353e86bc0831c17b078c24040fdd0c4e8ab08514a49f32c8d30bdd05f
3
+ size 637299
last-checkpoint/global_step1900/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1bf77037608fc752c877fb66d69006a641580266e173e61625e593bf57be3c5
3
+ size 637171
last-checkpoint/global_step1900/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:231be1114d61d22226ebe1d59cbf8739f7815ea661c2bb2f951d7ed5587a3666
3
+ size 637171
last-checkpoint/global_step1900/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d051a7de4c2283ae0bbc87046a8a57e26442a6c23b4a66eb9f4fe4aeb78f7dd
3
+ size 637171
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step1850
 
1
+ global_step1900
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7cc13c69d2b97530a3e18634e2f473678ea6880a6b34244c9c86a457f70137e1
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:585b8316bac42fce8431ad71be5542adc79fdd5ff1839401f8374f6ab8226086
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fcfd90610e35a8f40098e1413d3e3a1658276c859045d3450f227cab64c9081a
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc4c7cc343f6d985da07523b86ffd7b02d0b8ae40ff925936c48aff8a7385f39
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:663ae184d05dd950d27e1f419e0306ca3b141b9d7f075a80804cada62f64a363
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b364386e5b3618cb74f99527a8e31fa3c7d8d93018d2d0cf0b819ae4c3c1794b
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec4f0a72927b3837a0e73cb5612acf4318c503f3ba2108b3925f750332a49b60
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed71fe0f2fd47c414d9fd305b847e65b2f95946543d7b867318fdc05f761e6bc
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:657e1e43dbcdd82c1b5b25483acdceb69febc0e5b31b87d49bda24ccdcfd4221
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee7d3b553d79cd5d86dc19d51dc31e4f97cde4fa9e0ae7f8da298e252f169e81
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.2935050129890442,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4/lora/sft/checkpoint-1850",
4
- "epoch": 0.9528714911151172,
5
  "eval_steps": 50,
6
- "global_step": 1850,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3300,11 +3300,100 @@
3300
  "eval_steps_per_second": 0.77,
3301
  "num_input_tokens_seen": 21637848,
3302
  "step": 1850
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3303
  }
3304
  ],
3305
  "logging_steps": 5,
3306
  "max_steps": 3882,
3307
- "num_input_tokens_seen": 21637848,
3308
  "num_train_epochs": 2,
3309
  "save_steps": 50,
3310
  "stateful_callbacks": {
@@ -3319,7 +3408,7 @@
3319
  "attributes": {}
3320
  }
3321
  },
3322
- "total_flos": 1427712275382272.0,
3323
  "train_batch_size": 1,
3324
  "trial_name": null,
3325
  "trial_params": null
 
1
  {
2
  "best_metric": 0.2935050129890442,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4/lora/sft/checkpoint-1850",
4
+ "epoch": 0.978624774658769,
5
  "eval_steps": 50,
6
+ "global_step": 1900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3300
  "eval_steps_per_second": 0.77,
3301
  "num_input_tokens_seen": 21637848,
3302
  "step": 1850
3303
+ },
3304
+ {
3305
+ "epoch": 0.9554468194694824,
3306
+ "grad_norm": 11.793922029031792,
3307
+ "learning_rate": 5.778595395022226e-05,
3308
+ "loss": 0.4205,
3309
+ "num_input_tokens_seen": 21696328,
3310
+ "step": 1855
3311
+ },
3312
+ {
3313
+ "epoch": 0.9580221478238475,
3314
+ "grad_norm": 7.242120499330675,
3315
+ "learning_rate": 5.757546423837314e-05,
3316
+ "loss": 0.3075,
3317
+ "num_input_tokens_seen": 21754816,
3318
+ "step": 1860
3319
+ },
3320
+ {
3321
+ "epoch": 0.9605974761782127,
3322
+ "grad_norm": 15.811484589504676,
3323
+ "learning_rate": 5.736483702672155e-05,
3324
+ "loss": 0.2604,
3325
+ "num_input_tokens_seen": 21813296,
3326
+ "step": 1865
3327
+ },
3328
+ {
3329
+ "epoch": 0.9631728045325779,
3330
+ "grad_norm": 13.712784450465344,
3331
+ "learning_rate": 5.7154076138293914e-05,
3332
+ "loss": 0.342,
3333
+ "num_input_tokens_seen": 21871800,
3334
+ "step": 1870
3335
+ },
3336
+ {
3337
+ "epoch": 0.9657481328869431,
3338
+ "grad_norm": 7.941289555505942,
3339
+ "learning_rate": 5.694318539854297e-05,
3340
+ "loss": 0.411,
3341
+ "num_input_tokens_seen": 21930272,
3342
+ "step": 1875
3343
+ },
3344
+ {
3345
+ "epoch": 0.9683234612413083,
3346
+ "grad_norm": 13.422362176081661,
3347
+ "learning_rate": 5.673216863527836e-05,
3348
+ "loss": 0.2606,
3349
+ "num_input_tokens_seen": 21988736,
3350
+ "step": 1880
3351
+ },
3352
+ {
3353
+ "epoch": 0.9708987895956734,
3354
+ "grad_norm": 3.5902749610502247,
3355
+ "learning_rate": 5.652102967859715e-05,
3356
+ "loss": 0.2647,
3357
+ "num_input_tokens_seen": 22047200,
3358
+ "step": 1885
3359
+ },
3360
+ {
3361
+ "epoch": 0.9734741179500386,
3362
+ "grad_norm": 10.785857611570844,
3363
+ "learning_rate": 5.6309772360814295e-05,
3364
+ "loss": 0.2961,
3365
+ "num_input_tokens_seen": 22105664,
3366
+ "step": 1890
3367
+ },
3368
+ {
3369
+ "epoch": 0.9760494463044038,
3370
+ "grad_norm": 12.758145270494738,
3371
+ "learning_rate": 5.6098400516393065e-05,
3372
+ "loss": 0.2776,
3373
+ "num_input_tokens_seen": 22164184,
3374
+ "step": 1895
3375
+ },
3376
+ {
3377
+ "epoch": 0.978624774658769,
3378
+ "grad_norm": 9.907554223635353,
3379
+ "learning_rate": 5.5886917981875485e-05,
3380
+ "loss": 0.3781,
3381
+ "num_input_tokens_seen": 22222632,
3382
+ "step": 1900
3383
+ },
3384
+ {
3385
+ "epoch": 0.978624774658769,
3386
+ "eval_loss": 0.297338604927063,
3387
+ "eval_runtime": 19.4745,
3388
+ "eval_samples_per_second": 3.081,
3389
+ "eval_steps_per_second": 0.77,
3390
+ "num_input_tokens_seen": 22222632,
3391
+ "step": 1900
3392
  }
3393
  ],
3394
  "logging_steps": 5,
3395
  "max_steps": 3882,
3396
+ "num_input_tokens_seen": 22222632,
3397
  "num_train_epochs": 2,
3398
  "save_steps": 50,
3399
  "stateful_callbacks": {
 
3408
  "attributes": {}
3409
  }
3410
  },
3411
+ "total_flos": 1466299246968832.0,
3412
  "train_batch_size": 1,
3413
  "trial_name": null,
3414
  "trial_params": null