ben81828 commited on
Commit
9cf90e5
·
verified ·
1 Parent(s): 3e2ed10

Training in progress, step 2850, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f209712b49d72a0a61d59f7b8f5e51be948aa2ecb02420dc257c2b1ba5b38a8
3
  size 29034840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73c7f2377d2df91cf17de2733b3a5bbe82d66442455824ef9aa239771fcb5c26
3
  size 29034840
last-checkpoint/global_step2849/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45ab5885128ab15acfeda63788268d09370a0f0245104f50cb23ab0e1debf5d1
3
+ size 43429616
last-checkpoint/global_step2849/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cc72c29b8a71621fc9556a48734394c203d607b44ea67f32726da75dcf76d54
3
+ size 43429616
last-checkpoint/global_step2849/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a1c10bdfdd75e7e8018958e4aff8bde9e76b0bde1ebb0be5da4e566b9eccbb9
3
+ size 43429616
last-checkpoint/global_step2849/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a183a41b7c8207214291440122475dc391e27b810782729e2ee71e10f4e0d37
3
+ size 43429616
last-checkpoint/global_step2849/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7180ca4205568132765e6d2ead963d258ddb856ab3eae9d1bfcb22efb6872c96
3
+ size 637299
last-checkpoint/global_step2849/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae55ddd6b4e14375cd0c4d3f967ab0daccde3cb162e3e12251839e83e0195956
3
+ size 637171
last-checkpoint/global_step2849/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ee9490ec9c9853920e4c9155fb808160a417e30fd2bff714d8b01355280bd6f
3
+ size 637171
last-checkpoint/global_step2849/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e82babdf84d259edd42d8f571b3a5cbfeeb5c920141b498673efd54dfbd4c85d
3
+ size 637171
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step2799
 
1
+ global_step2849
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff8dba2341c0517760edfde50521977f02a5bd982ffd3bc03de6109439c4f478
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce92cea831a04716b4b472f1dad1cc986b2021dee9aac057217f5d455b27ec42
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2bf831df9fbade9ac2a8db79798bc2a7b1afb85a78a6e463ec7a7db4acc0f8e
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cddb73bbdf0f6f6a2c3182d70f7ad5d587353b164c08dd4f383b940d6b61e4e
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8602ff0a0fa366d46b61c0ef2b23ce468387898cf2bc1027e5450de73ddf647f
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b24b508e466beb446d37377d2a04757d3bc2b4230de3ac56b25a65d7753a74c1
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4bb51d675cf23603b1b765cd645f53d6b66ddb104d56d48674e9c798e086f696
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4c6a18a7de8b25b21673ba2ff7efbaaae00ec8c453c7975b467c1df87b87022
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2248ae5cd08b24968d749ba4f37513ee0a4cfb5059768755ed9ef6b0b1fd810
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06c7a8724990bbfa42e474ca2bea837a85b83fc6dd9afb66285c6f4108456bdd
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.18780523538589478,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4/lora/sft/checkpoint-2650",
4
- "epoch": 1.4419263456090652,
5
  "eval_steps": 50,
6
- "global_step": 2800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4991,11 +4991,100 @@
4991
  "eval_steps_per_second": 0.772,
4992
  "num_input_tokens_seen": 32743032,
4993
  "step": 2800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4994
  }
4995
  ],
4996
  "logging_steps": 5,
4997
  "max_steps": 3882,
4998
- "num_input_tokens_seen": 32743032,
4999
  "num_train_epochs": 2,
5000
  "save_steps": 50,
5001
  "stateful_callbacks": {
@@ -5010,7 +5099,7 @@
5010
  "attributes": {}
5011
  }
5012
  },
5013
- "total_flos": 2160533754609664.0,
5014
  "train_batch_size": 1,
5015
  "trial_name": null,
5016
  "trial_params": null
 
1
  {
2
  "best_metric": 0.18780523538589478,
3
  "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4/lora/sft/checkpoint-2650",
4
+ "epoch": 1.467679629152717,
5
  "eval_steps": 50,
6
+ "global_step": 2850,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4991
  "eval_steps_per_second": 0.772,
4992
  "num_input_tokens_seen": 32743032,
4993
  "step": 2800
4994
+ },
4995
+ {
4996
+ "epoch": 1.4445016739634302,
4997
+ "grad_norm": 5.47081802170478,
4998
+ "learning_rate": 1.9616882291322043e-05,
4999
+ "loss": 0.2128,
5000
+ "num_input_tokens_seen": 32801504,
5001
+ "step": 2805
5002
+ },
5003
+ {
5004
+ "epoch": 1.4470770023177955,
5005
+ "grad_norm": 8.153473173608221,
5006
+ "learning_rate": 1.9447980435272982e-05,
5007
+ "loss": 0.226,
5008
+ "num_input_tokens_seen": 32860032,
5009
+ "step": 2810
5010
+ },
5011
+ {
5012
+ "epoch": 1.4496523306721607,
5013
+ "grad_norm": 3.2012899866356275,
5014
+ "learning_rate": 1.9279633119054524e-05,
5015
+ "loss": 0.1945,
5016
+ "num_input_tokens_seen": 32918472,
5017
+ "step": 2815
5018
+ },
5019
+ {
5020
+ "epoch": 1.452227659026526,
5021
+ "grad_norm": 3.275737115662877,
5022
+ "learning_rate": 1.9111843398284412e-05,
5023
+ "loss": 0.1593,
5024
+ "num_input_tokens_seen": 32976944,
5025
+ "step": 2820
5026
+ },
5027
+ {
5028
+ "epoch": 1.4548029873808912,
5029
+ "grad_norm": 5.6817295869650755,
5030
+ "learning_rate": 1.8944614318459604e-05,
5031
+ "loss": 0.2154,
5032
+ "num_input_tokens_seen": 33035424,
5033
+ "step": 2825
5034
+ },
5035
+ {
5036
+ "epoch": 1.4573783157352562,
5037
+ "grad_norm": 4.248487908153702,
5038
+ "learning_rate": 1.8777948914901066e-05,
5039
+ "loss": 0.2266,
5040
+ "num_input_tokens_seen": 33093880,
5041
+ "step": 2830
5042
+ },
5043
+ {
5044
+ "epoch": 1.4599536440896215,
5045
+ "grad_norm": 4.65833753095402,
5046
+ "learning_rate": 1.8611850212698678e-05,
5047
+ "loss": 0.1948,
5048
+ "num_input_tokens_seen": 33152328,
5049
+ "step": 2835
5050
+ },
5051
+ {
5052
+ "epoch": 1.4625289724439865,
5053
+ "grad_norm": 6.9576190671694205,
5054
+ "learning_rate": 1.84463212266563e-05,
5055
+ "loss": 0.2159,
5056
+ "num_input_tokens_seen": 33210816,
5057
+ "step": 2840
5058
+ },
5059
+ {
5060
+ "epoch": 1.4651043007983517,
5061
+ "grad_norm": 28.44341760505862,
5062
+ "learning_rate": 1.8281364961237013e-05,
5063
+ "loss": 0.2326,
5064
+ "num_input_tokens_seen": 33269320,
5065
+ "step": 2845
5066
+ },
5067
+ {
5068
+ "epoch": 1.467679629152717,
5069
+ "grad_norm": 12.408239979973144,
5070
+ "learning_rate": 1.8116984410508696e-05,
5071
+ "loss": 0.1912,
5072
+ "num_input_tokens_seen": 33327720,
5073
+ "step": 2850
5074
+ },
5075
+ {
5076
+ "epoch": 1.467679629152717,
5077
+ "eval_loss": 0.2647402584552765,
5078
+ "eval_runtime": 19.6149,
5079
+ "eval_samples_per_second": 3.059,
5080
+ "eval_steps_per_second": 0.765,
5081
+ "num_input_tokens_seen": 33327720,
5082
+ "step": 2850
5083
  }
5084
  ],
5085
  "logging_steps": 5,
5086
  "max_steps": 3882,
5087
+ "num_input_tokens_seen": 33327720,
5088
  "num_train_epochs": 2,
5089
  "save_steps": 50,
5090
  "stateful_callbacks": {
 
5099
  "attributes": {}
5100
  }
5101
  },
5102
+ "total_flos": 2199114391224320.0,
5103
  "train_batch_size": 1,
5104
  "trial_name": null,
5105
  "trial_params": null