ben81828 commited on
Commit
6b24dcf
·
verified ·
1 Parent(s): c791c01

Training in progress, step 1250, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c47d2eda2744bde9c36de097c0d54ac6bcafb93bcd134cf6cefce20795772fe5
3
  size 29034840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:176fb1c1416e7836affe6e751cb01891a9a4235ffa9e418ece291228e7c61b72
3
  size 29034840
last-checkpoint/global_step1250/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3422a6a01821a496f9f0cfac38a8b398c68ac0e89ceb299fe65df44137bb4969
3
+ size 43429616
last-checkpoint/global_step1250/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9963f3643d9761b3578e0c0752901b5ae3242115e20fed7e7a35ba6355f7c12a
3
+ size 43429616
last-checkpoint/global_step1250/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b594c71893dff809c032ef89e186949607d9c2a1f3f9f8c81a2016cb03a5f057
3
+ size 43429616
last-checkpoint/global_step1250/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07d74c60ad2b557694c05090e26d001467671390650d44dffb0f1194fea9b14e
3
+ size 43429616
last-checkpoint/global_step1250/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ec0f2cde302545248de950463724f6d392acd94dddf06b2baba0a3d8f91b0e0
3
+ size 637299
last-checkpoint/global_step1250/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e30a5b6d29417f61fce028606e2a43dda69feda25ab401bf98a939ad68d0a488
3
+ size 637171
last-checkpoint/global_step1250/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55d5732bdcd7f622a6122c509cb64b5a03edeaa513ee805d437e1045732cd1d7
3
+ size 637171
last-checkpoint/global_step1250/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac58ba9b33d13c7004129934ac2b499707908f73809a09276a3945da3fd62d36
3
+ size 637171
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step1200
 
1
+ global_step1250
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a209a0c0025f9ce8e2beeba50c1f0828d5c34a2482310fcd0bf5fc24c2c67be2
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15bbef9a4c878a05b4f189e9f77701a153dc9faf093499714094cb36ac0ca030
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a67fb929b8c51f9b1c6ff9f11366e57e55128a1d36df85a9d37a008b49017a75
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a7122760cfda3f3e13eac4f7e56d09b0fd3beac4bd49b2f0e58da9519469ff9
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b9ef3b0c0978d0b611f4257c939f1c2c6f07e6227bfea6675532d285b0b64a7
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71edf044d4ea96e62ffecab515d42b58ae56cd64abd0092a897468e0e0d7c10f
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0183d14c8ed52ee533139532e9bcf7bc34ec297a064845b35741cb501d92675f
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef5ca30351deca9d83675a183b2186ef346093f56e0d5a6dacdf62ef921f12c2
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e37e09d4d7db5550a63706e3b322910f912c40483fec5cf5aec0fc0a164e43d1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ce8d60a774ed9abbbc615b4d3c2824ac579f9fc2a9f4863bd96b28d7ae48e71
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.6184359192848206,
3
- "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4/lora/sft/checkpoint-1200",
4
- "epoch": 0.6180788050476436,
5
  "eval_steps": 50,
6
- "global_step": 1200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2143,11 +2143,100 @@
2143
  "eval_steps_per_second": 0.777,
2144
  "num_input_tokens_seen": 14035544,
2145
  "step": 1200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2146
  }
2147
  ],
2148
  "logging_steps": 5,
2149
  "max_steps": 3882,
2150
- "num_input_tokens_seen": 14035544,
2151
  "num_train_epochs": 2,
2152
  "save_steps": 50,
2153
  "stateful_callbacks": {
@@ -2162,7 +2251,7 @@
2162
  "attributes": {}
2163
  }
2164
  },
2165
- "total_flos": 926073204768768.0,
2166
  "train_batch_size": 1,
2167
  "trial_name": null,
2168
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.5496931672096252,
3
+ "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4/lora/sft/checkpoint-1250",
4
+ "epoch": 0.6438320885912954,
5
  "eval_steps": 50,
6
+ "global_step": 1250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2143
  "eval_steps_per_second": 0.777,
2144
  "num_input_tokens_seen": 14035544,
2145
  "step": 1200
2146
+ },
2147
+ {
2148
+ "epoch": 0.6206541334020087,
2149
+ "grad_norm": 5.625941133087891,
2150
+ "learning_rate": 8.259937513218066e-05,
2151
+ "loss": 0.5109,
2152
+ "num_input_tokens_seen": 14094024,
2153
+ "step": 1205
2154
+ },
2155
+ {
2156
+ "epoch": 0.623229461756374,
2157
+ "grad_norm": 11.892235969186327,
2158
+ "learning_rate": 8.243756302961898e-05,
2159
+ "loss": 0.4738,
2160
+ "num_input_tokens_seen": 14152504,
2161
+ "step": 1210
2162
+ },
2163
+ {
2164
+ "epoch": 0.6258047901107391,
2165
+ "grad_norm": 45.17960159223106,
2166
+ "learning_rate": 8.227516216333679e-05,
2167
+ "loss": 0.5615,
2168
+ "num_input_tokens_seen": 14210992,
2169
+ "step": 1215
2170
+ },
2171
+ {
2172
+ "epoch": 0.6283801184651043,
2173
+ "grad_norm": 14.930236962628644,
2174
+ "learning_rate": 8.211217548101973e-05,
2175
+ "loss": 0.5584,
2176
+ "num_input_tokens_seen": 14269488,
2177
+ "step": 1220
2178
+ },
2179
+ {
2180
+ "epoch": 0.6309554468194695,
2181
+ "grad_norm": 49.91459221869246,
2182
+ "learning_rate": 8.194860594098635e-05,
2183
+ "loss": 0.4856,
2184
+ "num_input_tokens_seen": 14327968,
2185
+ "step": 1225
2186
+ },
2187
+ {
2188
+ "epoch": 0.6335307751738347,
2189
+ "grad_norm": 14.899444451092219,
2190
+ "learning_rate": 8.17844565121345e-05,
2191
+ "loss": 0.5378,
2192
+ "num_input_tokens_seen": 14386448,
2193
+ "step": 1230
2194
+ },
2195
+ {
2196
+ "epoch": 0.6361061035281999,
2197
+ "grad_norm": 10.76781481162281,
2198
+ "learning_rate": 8.161973017388744e-05,
2199
+ "loss": 0.4484,
2200
+ "num_input_tokens_seen": 14444912,
2201
+ "step": 1235
2202
+ },
2203
+ {
2204
+ "epoch": 0.638681431882565,
2205
+ "grad_norm": 11.97619546639196,
2206
+ "learning_rate": 8.145442991613963e-05,
2207
+ "loss": 0.4772,
2208
+ "num_input_tokens_seen": 14503392,
2209
+ "step": 1240
2210
+ },
2211
+ {
2212
+ "epoch": 0.6412567602369302,
2213
+ "grad_norm": 12.878458794693833,
2214
+ "learning_rate": 8.128855873920265e-05,
2215
+ "loss": 0.5807,
2216
+ "num_input_tokens_seen": 14561872,
2217
+ "step": 1245
2218
+ },
2219
+ {
2220
+ "epoch": 0.6438320885912954,
2221
+ "grad_norm": 5.57738881271864,
2222
+ "learning_rate": 8.112211965375059e-05,
2223
+ "loss": 0.5268,
2224
+ "num_input_tokens_seen": 14620336,
2225
+ "step": 1250
2226
+ },
2227
+ {
2228
+ "epoch": 0.6438320885912954,
2229
+ "eval_loss": 0.5496931672096252,
2230
+ "eval_runtime": 19.4472,
2231
+ "eval_samples_per_second": 3.085,
2232
+ "eval_steps_per_second": 0.771,
2233
+ "num_input_tokens_seen": 14620336,
2234
+ "step": 1250
2235
  }
2236
  ],
2237
  "logging_steps": 5,
2238
  "max_steps": 3882,
2239
+ "num_input_tokens_seen": 14620336,
2240
  "num_train_epochs": 2,
2241
  "save_steps": 50,
2242
  "stateful_callbacks": {
 
2251
  "attributes": {}
2252
  }
2253
  },
2254
+ "total_flos": 964658239897600.0,
2255
  "train_batch_size": 1,
2256
  "trial_name": null,
2257
  "trial_params": null