Rakhman16 commited on
Commit
5874b6b
·
verified ·
1 Parent(s): b4c14b4

Training in progress, step 5500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ccdc97af0c664a598630c782a7b331756b66dbb34a0f09170e5fc260495d1b53
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2aa3dfa629d6f0a4b00e9aa0db2fda3e9ab1e4c509f72b23ef5457288b2e474f
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e5a38645e0ad06e7337b8afd1b8688354e44665215d46144294c375e1b14ec6
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd9c7ab2e3359d64280354b30eccac6666a4328bc54eac0eee212a29f2aaf43c
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f750e8c47e9e6edd21fa1108074fa273b123ea44b89fc5876f119d3a8a4022f1
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a194bef12ceeecc6ad10e9d032a837c1a3c1db13b2c2253686b43518ae42503
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e794023d937cb42e6b824ae46ca100bd6dbedd5057ac527c5cededbfc6fc3265
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f66a57bd394806719fb22948b621f468b035d0a2c7c49f5f8b90d526b96103c4
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.2013118416070938,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-5000",
4
- "epoch": 3.512469265893923,
5
  "eval_steps": 100,
6
- "global_step": 5000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1107,6 +1107,116 @@
1107
  "eval_samples_per_second": 66.551,
1108
  "eval_steps_per_second": 2.089,
1109
  "step": 5000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1110
  }
1111
  ],
1112
  "logging_steps": 50,
@@ -1126,7 +1236,7 @@
1126
  "attributes": {}
1127
  }
1128
  },
1129
- "total_flos": 4.87097804685312e+16,
1130
  "train_batch_size": 32,
1131
  "trial_name": null,
1132
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.20129592716693878,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-5500",
4
+ "epoch": 3.8637161924833157,
5
  "eval_steps": 100,
6
+ "global_step": 5500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1107
  "eval_samples_per_second": 66.551,
1108
  "eval_steps_per_second": 2.089,
1109
  "step": 5000
1110
+ },
1111
+ {
1112
+ "epoch": 3.547593958552863,
1113
+ "grad_norm": 27274.357421875,
1114
+ "learning_rate": 3.383696416022488e-06,
1115
+ "loss": 0.1968,
1116
+ "step": 5050
1117
+ },
1118
+ {
1119
+ "epoch": 3.5827186512118017,
1120
+ "grad_norm": 26782.548828125,
1121
+ "learning_rate": 3.1201686577652844e-06,
1122
+ "loss": 0.1878,
1123
+ "step": 5100
1124
+ },
1125
+ {
1126
+ "epoch": 3.5827186512118017,
1127
+ "eval_loss": 0.20154449343681335,
1128
+ "eval_runtime": 67.1325,
1129
+ "eval_samples_per_second": 66.436,
1130
+ "eval_steps_per_second": 2.085,
1131
+ "step": 5100
1132
+ },
1133
+ {
1134
+ "epoch": 3.6178433438707414,
1135
+ "grad_norm": 18810.177734375,
1136
+ "learning_rate": 2.8566408995080814e-06,
1137
+ "loss": 0.1912,
1138
+ "step": 5150
1139
+ },
1140
+ {
1141
+ "epoch": 3.65296803652968,
1142
+ "grad_norm": 26744.78515625,
1143
+ "learning_rate": 2.593113141250879e-06,
1144
+ "loss": 0.1975,
1145
+ "step": 5200
1146
+ },
1147
+ {
1148
+ "epoch": 3.65296803652968,
1149
+ "eval_loss": 0.20147912204265594,
1150
+ "eval_runtime": 67.0091,
1151
+ "eval_samples_per_second": 66.558,
1152
+ "eval_steps_per_second": 2.089,
1153
+ "step": 5200
1154
+ },
1155
+ {
1156
+ "epoch": 3.68809272918862,
1157
+ "grad_norm": 23326.36328125,
1158
+ "learning_rate": 2.3295853829936753e-06,
1159
+ "loss": 0.1995,
1160
+ "step": 5250
1161
+ },
1162
+ {
1163
+ "epoch": 3.7232174218475587,
1164
+ "grad_norm": 21197.091796875,
1165
+ "learning_rate": 2.0660576247364723e-06,
1166
+ "loss": 0.1894,
1167
+ "step": 5300
1168
+ },
1169
+ {
1170
+ "epoch": 3.7232174218475587,
1171
+ "eval_loss": 0.20139345526695251,
1172
+ "eval_runtime": 66.9887,
1173
+ "eval_samples_per_second": 66.578,
1174
+ "eval_steps_per_second": 2.09,
1175
+ "step": 5300
1176
+ },
1177
+ {
1178
+ "epoch": 3.758342114506498,
1179
+ "grad_norm": 23258.3671875,
1180
+ "learning_rate": 1.8025298664792693e-06,
1181
+ "loss": 0.1941,
1182
+ "step": 5350
1183
+ },
1184
+ {
1185
+ "epoch": 3.793466807165437,
1186
+ "grad_norm": 25702.90234375,
1187
+ "learning_rate": 1.539002108222066e-06,
1188
+ "loss": 0.1952,
1189
+ "step": 5400
1190
+ },
1191
+ {
1192
+ "epoch": 3.793466807165437,
1193
+ "eval_loss": 0.20133435726165771,
1194
+ "eval_runtime": 67.0042,
1195
+ "eval_samples_per_second": 66.563,
1196
+ "eval_steps_per_second": 2.089,
1197
+ "step": 5400
1198
+ },
1199
+ {
1200
+ "epoch": 3.8285914998243764,
1201
+ "grad_norm": 22600.765625,
1202
+ "learning_rate": 1.275474349964863e-06,
1203
+ "loss": 0.1912,
1204
+ "step": 5450
1205
+ },
1206
+ {
1207
+ "epoch": 3.8637161924833157,
1208
+ "grad_norm": 25134.44921875,
1209
+ "learning_rate": 1.0119465917076597e-06,
1210
+ "loss": 0.197,
1211
+ "step": 5500
1212
+ },
1213
+ {
1214
+ "epoch": 3.8637161924833157,
1215
+ "eval_loss": 0.20129592716693878,
1216
+ "eval_runtime": 67.1868,
1217
+ "eval_samples_per_second": 66.382,
1218
+ "eval_steps_per_second": 2.084,
1219
+ "step": 5500
1220
  }
1221
  ],
1222
  "logging_steps": 50,
 
1236
  "attributes": {}
1237
  }
1238
  },
1239
+ "total_flos": 5.35814435930112e+16,
1240
  "train_batch_size": 32,
1241
  "trial_name": null,
1242
  "trial_params": null