ben81828 commited on
Commit
387859e
·
verified ·
1 Parent(s): ae9b31c

Training in progress, step 600, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:82e427cc7e624394004d64620be182b50bd10e2a67a34cf622de25a3f138b7e0
3
  size 29034840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:168cc62ce0f9bc823e9f05cfca486c4f8b12cfdb3adf1b70687137fa417f7b65
3
  size 29034840
last-checkpoint/global_step600/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f9fe0148996918ae64e3daf55de125922b1aac01d65237cb6d580396eb5edb5
3
+ size 43429616
last-checkpoint/global_step600/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:981a666d180eb07860966c360bb3546e8d109fe8e734a3cc93556dd84b087068
3
+ size 43429616
last-checkpoint/global_step600/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:206025f90c8c7ddfc483b10a17cdbd7607d47453ab345d0d2adba2482f60148c
3
+ size 43429616
last-checkpoint/global_step600/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36b63312a138941da8cb52882eb69e743ebac4a3d9a0f0e5c118e50224999076
3
+ size 43429616
last-checkpoint/global_step600/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11e8027e9407df0ac39e2b3b0f9b391ceffcc0365bf7b524d551f5dbe3c76e79
3
+ size 637299
last-checkpoint/global_step600/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c20dbc40aa3a9738159e35e517f2b9b468c6b1bee8cb810efadc972e0821f0fc
3
+ size 637171
last-checkpoint/global_step600/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:585439023d752dde2525d7bdccd458f3fb79989825110ba63e0cce828093c299
3
+ size 637171
last-checkpoint/global_step600/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e197c45213ad104a71f307e0f73afc54979e8843c65bd3e2a9fc618c43b98f06
3
+ size 637171
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step550
 
1
+ global_step600
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae78313eb528c8d3695eebaf4de3539bd0a0bc6ee18c66af1ee183442f1758a0
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a81e3916b1392c4c49afb171dee5415c15f5a5a5af8749b28195fcfa0596699c
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b38031f60d9e88601d369ef46bcdcf2b5b03f2cb4ba93853bcb2328df7ebb7c
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a781038dd714b87b8adb1aac8dbc8217ceb607428a992133954ad522365236e
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f58092375c93d237cd0e3149aecfbf83e2acdae46279e07a32920d01cb507e64
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9446c3db15f382a5546f13622787fc99392a5e0bc8a9ca2da1838de7ab621a37
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83cd4bbff9962da7ec6787fcea8d65df7096917f9a5902e249ba7aee8887fe5f
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f11e7a6b3faa884fc23044e3772ff9dd72c257f02e121665061e2a03d518bd9
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e969c9b0ecef9c1209a6397ff63db034af1cc51341323dc3dc14016347fe871
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c97a1e2f2542883d462e18c679fb75515cd51cbf96416fbbbdc7ed7d003e43a9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.7909801602363586,
3
- "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4/lora/sft/checkpoint-550",
4
- "epoch": 0.28328611898017,
5
  "eval_steps": 50,
6
- "global_step": 550,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -986,11 +986,100 @@
986
  "eval_steps_per_second": 0.755,
987
  "num_input_tokens_seen": 6432936,
988
  "step": 550
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
989
  }
990
  ],
991
  "logging_steps": 5,
992
  "max_steps": 3882,
993
- "num_input_tokens_seen": 6432936,
994
  "num_train_epochs": 2,
995
  "save_steps": 50,
996
  "stateful_callbacks": {
@@ -1005,7 +1094,7 @@
1005
  "attributes": {}
1006
  }
1007
  },
1008
- "total_flos": 424412082339840.0,
1009
  "train_batch_size": 1,
1010
  "trial_name": null,
1011
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.787663459777832,
3
+ "best_model_checkpoint": "saves/CADICA_qwenvl_stenosis_classily_scale4/lora/sft/checkpoint-600",
4
+ "epoch": 0.3090394025238218,
5
  "eval_steps": 50,
6
+ "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
986
  "eval_steps_per_second": 0.755,
987
  "num_input_tokens_seen": 6432936,
988
  "step": 550
989
+ },
990
+ {
991
+ "epoch": 0.28586144733453517,
992
+ "grad_norm": 3.529163852540611,
993
+ "learning_rate": 9.7666055857936e-05,
994
+ "loss": 0.8264,
995
+ "num_input_tokens_seen": 6491400,
996
+ "step": 555
997
+ },
998
+ {
999
+ "epoch": 0.28843677568890036,
1000
+ "grad_norm": 4.044590312854015,
1001
+ "learning_rate": 9.760130083696595e-05,
1002
+ "loss": 0.8456,
1003
+ "num_input_tokens_seen": 6549872,
1004
+ "step": 560
1005
+ },
1006
+ {
1007
+ "epoch": 0.2910121040432655,
1008
+ "grad_norm": 4.650808013267891,
1009
+ "learning_rate": 9.75356818201724e-05,
1010
+ "loss": 0.8032,
1011
+ "num_input_tokens_seen": 6608296,
1012
+ "step": 565
1013
+ },
1014
+ {
1015
+ "epoch": 0.2935874323976307,
1016
+ "grad_norm": 6.577223054225459,
1017
+ "learning_rate": 9.746919999858492e-05,
1018
+ "loss": 0.8081,
1019
+ "num_input_tokens_seen": 6666768,
1020
+ "step": 570
1021
+ },
1022
+ {
1023
+ "epoch": 0.2961627607519959,
1024
+ "grad_norm": 7.1732684079932545,
1025
+ "learning_rate": 9.740185657889357e-05,
1026
+ "loss": 0.8398,
1027
+ "num_input_tokens_seen": 6725248,
1028
+ "step": 575
1029
+ },
1030
+ {
1031
+ "epoch": 0.29873808910636107,
1032
+ "grad_norm": 21.451661035438484,
1033
+ "learning_rate": 9.733365278342696e-05,
1034
+ "loss": 0.8908,
1035
+ "num_input_tokens_seen": 6783680,
1036
+ "step": 580
1037
+ },
1038
+ {
1039
+ "epoch": 0.30131341746072626,
1040
+ "grad_norm": 4.031699151478832,
1041
+ "learning_rate": 9.726458985013017e-05,
1042
+ "loss": 0.8248,
1043
+ "num_input_tokens_seen": 6842144,
1044
+ "step": 585
1045
+ },
1046
+ {
1047
+ "epoch": 0.3038887458150914,
1048
+ "grad_norm": 3.45579530759462,
1049
+ "learning_rate": 9.719466903254215e-05,
1050
+ "loss": 0.829,
1051
+ "num_input_tokens_seen": 6900656,
1052
+ "step": 590
1053
+ },
1054
+ {
1055
+ "epoch": 0.3064640741694566,
1056
+ "grad_norm": 4.518719062630672,
1057
+ "learning_rate": 9.712389159977307e-05,
1058
+ "loss": 0.8269,
1059
+ "num_input_tokens_seen": 6959128,
1060
+ "step": 595
1061
+ },
1062
+ {
1063
+ "epoch": 0.3090394025238218,
1064
+ "grad_norm": 12.728221405806083,
1065
+ "learning_rate": 9.705225883648121e-05,
1066
+ "loss": 0.7997,
1067
+ "num_input_tokens_seen": 7017576,
1068
+ "step": 600
1069
+ },
1070
+ {
1071
+ "epoch": 0.3090394025238218,
1072
+ "eval_loss": 0.787663459777832,
1073
+ "eval_runtime": 19.6121,
1074
+ "eval_samples_per_second": 3.059,
1075
+ "eval_steps_per_second": 0.765,
1076
+ "num_input_tokens_seen": 7017576,
1077
+ "step": 600
1078
  }
1079
  ],
1080
  "logging_steps": 5,
1081
  "max_steps": 3882,
1082
+ "num_input_tokens_seen": 7017576,
1083
  "num_train_epochs": 2,
1084
  "save_steps": 50,
1085
  "stateful_callbacks": {
 
1094
  "attributes": {}
1095
  }
1096
  },
1097
+ "total_flos": 462990180876288.0,
1098
  "train_batch_size": 1,
1099
  "trial_name": null,
1100
  "trial_params": null