ben81828 commited on
Commit
7f2672b
·
verified ·
1 Parent(s): 6dcda93

Training in progress, step 2000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ac734ac2c76973c39f2d42217ce6a151066fb0c8286e22fc868a6924722b410
3
  size 29034840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:803c4a6d1224f4cccaa865d5f234c98fc604663a56f02aca7b212731cc155316
3
  size 29034840
last-checkpoint/global_step1999/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2da4a74c99ff2a3a05e231f3bc04ac37ee61717dee72b049654400a5944b33a4
3
+ size 43429616
last-checkpoint/global_step1999/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d24edf0c016999f7fb5f14dc6e33d92be7af90f81655956eef2e58fe28049c96
3
+ size 43429616
last-checkpoint/global_step1999/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30b247b7741d4a2444f0cd782ba98252ff6a6f05291fa7fbd6b9d44adaaba1e9
3
+ size 43429616
last-checkpoint/global_step1999/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d831ecb5039331bbe89b8d322a1d72f4e2a7226c4de155b413cf864ee17c3b5
3
+ size 43429616
last-checkpoint/global_step1999/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f19492326c04faae8146851321353d43cb491f4742ef405b7a5c349ec89bfcf
3
+ size 637299
last-checkpoint/global_step1999/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4637523a1adc180146788f447c049b5340fb1af93d2025543f54b5d714b76333
3
+ size 637171
last-checkpoint/global_step1999/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d38d8975194abd2f50f2f772baa3160dd82309ea87218fa2cb9c2ce5b7667ed4
3
+ size 637171
last-checkpoint/global_step1999/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f99c2a9df149ad4addcbb0dc9882b8d52cbfbd130ac34f4878ca8b6042b85e75
3
+ size 637171
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step1949
 
1
+ global_step1999
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76c7a1c4ad326ceb79f3afbc6d47975b14a4cb17c9f8fb7483b37b11ee134aac
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4a695de1db3382235d3f8ae213672491aa2fdc3ba3be96403a089077ad3c2bf
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:364e129a29ed2420756ce71165221396b3418a310a60e2d96548d62cc7590232
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2025b170fa1d4693537c2d73f89a6495c58940d033678742a74810c0154a6a7
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a89d078b36f7a96070fb2b399b9fe9bc0196d5110cb8255158e3354d836845a5
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0865c4d7d921b23a22c91c2f3b2c6cca03dae0eb27c43dee575c9602605c94d6
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10672d33daa64ff34468d947c3c30b17fe906ae6c3d9ace60ba1c3e119c8efa4
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f4ec19d9df4417359523e8cc4d27875614c1021ebcc6391b27632aa7897b7ea
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1dceb4d7c8fe3edb9c9b341578a50f3bbedf01d3139eb1e8d1c37f0a6675d5e3
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:657310b2e177154a68163e400108db006e4fa6c6598cc6aead4040fd6bb39767
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.2472737729549408,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4/lora/sft/checkpoint-1950",
4
- "epoch": 1.0041205253669843,
5
  "eval_steps": 50,
6
- "global_step": 1950,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3478,11 +3478,100 @@
3478
  "eval_steps_per_second": 0.77,
3479
  "num_input_tokens_seen": 22801512,
3480
  "step": 1950
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3481
  }
3482
  ],
3483
  "logging_steps": 5,
3484
  "max_steps": 3882,
3485
- "num_input_tokens_seen": 22801512,
3486
  "num_train_epochs": 2,
3487
  "save_steps": 50,
3488
  "stateful_callbacks": {
@@ -3497,7 +3586,7 @@
3497
  "attributes": {}
3498
  }
3499
  },
3500
- "total_flos": 1504535646044160.0,
3501
  "train_batch_size": 1,
3502
  "trial_name": null,
3503
  "trial_params": null
 
1
  {
2
  "best_metric": 0.2472737729549408,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4/lora/sft/checkpoint-1950",
4
+ "epoch": 1.029873808910636,
5
  "eval_steps": 50,
6
+ "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3478
  "eval_steps_per_second": 0.77,
3479
  "num_input_tokens_seen": 22801512,
3480
  "step": 1950
3481
+ },
3482
+ {
3483
+ "epoch": 1.0066958537213495,
3484
+ "grad_norm": 9.441611539164802,
3485
+ "learning_rate": 5.355440367836396e-05,
3486
+ "loss": 0.278,
3487
+ "num_input_tokens_seen": 22859952,
3488
+ "step": 1955
3489
+ },
3490
+ {
3491
+ "epoch": 1.0092711820757148,
3492
+ "grad_norm": 13.17108131671136,
3493
+ "learning_rate": 5.334189277447138e-05,
3494
+ "loss": 0.3433,
3495
+ "num_input_tokens_seen": 22918440,
3496
+ "step": 1960
3497
+ },
3498
+ {
3499
+ "epoch": 1.0118465104300798,
3500
+ "grad_norm": 6.702666612997951,
3501
+ "learning_rate": 5.312932121296339e-05,
3502
+ "loss": 0.2929,
3503
+ "num_input_tokens_seen": 22976944,
3504
+ "step": 1965
3505
+ },
3506
+ {
3507
+ "epoch": 1.014421838784445,
3508
+ "grad_norm": 8.293945652706178,
3509
+ "learning_rate": 5.291669285215766e-05,
3510
+ "loss": 0.2804,
3511
+ "num_input_tokens_seen": 23035408,
3512
+ "step": 1970
3513
+ },
3514
+ {
3515
+ "epoch": 1.0169971671388103,
3516
+ "grad_norm": 9.067903045446336,
3517
+ "learning_rate": 5.270401155140284e-05,
3518
+ "loss": 0.2583,
3519
+ "num_input_tokens_seen": 23093912,
3520
+ "step": 1975
3521
+ },
3522
+ {
3523
+ "epoch": 1.0195724954931753,
3524
+ "grad_norm": 6.0243656792037905,
3525
+ "learning_rate": 5.2491281171008476e-05,
3526
+ "loss": 0.3232,
3527
+ "num_input_tokens_seen": 23152344,
3528
+ "step": 1980
3529
+ },
3530
+ {
3531
+ "epoch": 1.0221478238475405,
3532
+ "grad_norm": 11.796220731230777,
3533
+ "learning_rate": 5.227850557217494e-05,
3534
+ "loss": 0.2935,
3535
+ "num_input_tokens_seen": 23210800,
3536
+ "step": 1985
3537
+ },
3538
+ {
3539
+ "epoch": 1.0247231522019058,
3540
+ "grad_norm": 12.531746818475774,
3541
+ "learning_rate": 5.2065688616923314e-05,
3542
+ "loss": 0.2677,
3543
+ "num_input_tokens_seen": 23269304,
3544
+ "step": 1990
3545
+ },
3546
+ {
3547
+ "epoch": 1.0272984805562708,
3548
+ "grad_norm": 12.674176291351744,
3549
+ "learning_rate": 5.185283416802539e-05,
3550
+ "loss": 0.2697,
3551
+ "num_input_tokens_seen": 23327800,
3552
+ "step": 1995
3553
+ },
3554
+ {
3555
+ "epoch": 1.029873808910636,
3556
+ "grad_norm": 9.205507664261733,
3557
+ "learning_rate": 5.1639946088933444e-05,
3558
+ "loss": 0.2272,
3559
+ "num_input_tokens_seen": 23386232,
3560
+ "step": 2000
3561
+ },
3562
+ {
3563
+ "epoch": 1.029873808910636,
3564
+ "eval_loss": 0.2834003269672394,
3565
+ "eval_runtime": 19.4052,
3566
+ "eval_samples_per_second": 3.092,
3567
+ "eval_steps_per_second": 0.773,
3568
+ "num_input_tokens_seen": 23386232,
3569
+ "step": 2000
3570
  }
3571
  ],
3572
  "logging_steps": 5,
3573
  "max_steps": 3882,
3574
+ "num_input_tokens_seen": 23386232,
3575
  "num_train_epochs": 2,
3576
  "save_steps": 50,
3577
  "stateful_callbacks": {
 
3586
  "attributes": {}
3587
  }
3588
  },
3589
+ "total_flos": 1543118919303168.0,
3590
  "train_batch_size": 1,
3591
  "trial_name": null,
3592
  "trial_params": null