kooff11 commited on
Commit
aaf03f2
·
verified ·
1 Parent(s): 669bb9e

Training in progress, step 27, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bcff3819f2d8f358549556f4ab228fb736c1d56e961ebb558cd3063a90ffb5e5
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f54285af820322f413bedf90a197fab9546824b126b677e531d7a3e34609c30c
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a992b6fdfdf746be8efbf1ff98980cad8d0f702da31dffa8aa721ccd2065560
3
  size 85723284
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28c7adf268e87dd0a41446aaa874ecc5ab743fb9a94da50a96ececdd5a3bee8f
3
  size 85723284
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b0a960eb7cbc41ec4c0d6750e9eac466c9ca6eb509aa83556250ea111b5363b
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fc23edab8ba1108a86a5e66e050703f0e0017d14e4377e9cbd25e332b216c4b
3
  size 14512
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e71edc115305704ae35e847cbd40ad2b847e46942216a521d42b019f652edc5b
3
  size 14512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a43d18fc96a7898854e74a08413f3781a39c478df43b8b248b0577bb9030b97
3
  size 14512
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3e86b20a5b94cd46faa4d64e6cf42c67c97d41b2661a560693dd74f1d50d8d7b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2bb4b588419b60e1f59dda48eeb3b428275266887083bc814e29c779f54f5c8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.518918918918919,
5
  "eval_steps": 9,
6
- "global_step": 18,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -157,6 +157,77 @@
157
  "eval_samples_per_second": 10.199,
158
  "eval_steps_per_second": 2.571,
159
  "step": 18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  }
161
  ],
162
  "logging_steps": 1,
@@ -176,7 +247,7 @@
176
  "attributes": {}
177
  }
178
  },
179
- "total_flos": 4.0500259124045414e+17,
180
  "train_batch_size": 2,
181
  "trial_name": null,
182
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7783783783783784,
5
  "eval_steps": 9,
6
+ "global_step": 27,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
157
  "eval_samples_per_second": 10.199,
158
  "eval_steps_per_second": 2.571,
159
  "step": 18
160
+ },
161
+ {
162
+ "epoch": 0.5477477477477477,
163
+ "grad_norm": 3.7183847427368164,
164
+ "learning_rate": 4.762090420881289e-05,
165
+ "loss": 24.4091,
166
+ "step": 19
167
+ },
168
+ {
169
+ "epoch": 0.5765765765765766,
170
+ "grad_norm": 3.6958751678466797,
171
+ "learning_rate": 4.288425808633575e-05,
172
+ "loss": 22.3664,
173
+ "step": 20
174
+ },
175
+ {
176
+ "epoch": 0.6054054054054054,
177
+ "grad_norm": 3.622276544570923,
178
+ "learning_rate": 3.821205322452863e-05,
179
+ "loss": 23.2693,
180
+ "step": 21
181
+ },
182
+ {
183
+ "epoch": 0.6342342342342342,
184
+ "grad_norm": 3.4962754249572754,
185
+ "learning_rate": 3.364660183412892e-05,
186
+ "loss": 23.2194,
187
+ "step": 22
188
+ },
189
+ {
190
+ "epoch": 0.6630630630630631,
191
+ "grad_norm": 4.327157020568848,
192
+ "learning_rate": 2.9229249349905684e-05,
193
+ "loss": 22.1926,
194
+ "step": 23
195
+ },
196
+ {
197
+ "epoch": 0.6918918918918919,
198
+ "grad_norm": 3.3497116565704346,
199
+ "learning_rate": 2.500000000000001e-05,
200
+ "loss": 23.5823,
201
+ "step": 24
202
+ },
203
+ {
204
+ "epoch": 0.7207207207207207,
205
+ "grad_norm": 3.5109105110168457,
206
+ "learning_rate": 2.09971545214401e-05,
207
+ "loss": 23.8332,
208
+ "step": 25
209
+ },
210
+ {
211
+ "epoch": 0.7495495495495496,
212
+ "grad_norm": 3.7513277530670166,
213
+ "learning_rate": 1.725696330273575e-05,
214
+ "loss": 22.0996,
215
+ "step": 26
216
+ },
217
+ {
218
+ "epoch": 0.7783783783783784,
219
+ "grad_norm": 3.768648147583008,
220
+ "learning_rate": 1.3813298094746491e-05,
221
+ "loss": 23.2716,
222
+ "step": 27
223
+ },
224
+ {
225
+ "epoch": 0.7783783783783784,
226
+ "eval_loss": 0.7317541241645813,
227
+ "eval_runtime": 22.9476,
228
+ "eval_samples_per_second": 10.197,
229
+ "eval_steps_per_second": 2.571,
230
+ "step": 27
231
  }
232
  ],
233
  "logging_steps": 1,
 
247
  "attributes": {}
248
  }
249
  },
250
+ "total_flos": 6.075038868606812e+17,
251
  "train_batch_size": 2,
252
  "trial_name": null,
253
  "trial_params": null