ales commited on
Commit
dce1c5a
·
1 Parent(s): 13aec20

End of training

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 0.33,
3
- "eval_loss": 0.5382302403450012,
4
- "eval_runtime": 19.0163,
5
  "eval_samples": 64,
6
- "eval_samples_per_second": 3.366,
7
- "eval_steps_per_second": 0.105,
8
- "eval_wer": 55.12820512820513,
9
- "train_loss": 0.13119232177734375,
10
- "train_runtime": 451.4438,
11
- "train_samples_per_second": 10.633,
12
- "train_steps_per_second": 0.332
13
  }
 
1
  {
2
+ "epoch": 0.25,
3
+ "eval_loss": 0.5364237427711487,
4
+ "eval_runtime": 16.6084,
5
  "eval_samples": 64,
6
+ "eval_samples_per_second": 3.853,
7
+ "eval_steps_per_second": 0.12,
8
+ "eval_wer": 54.57875457875458,
9
+ "train_loss": 0.0719480574131012,
10
+ "train_runtime": 406.2172,
11
+ "train_samples_per_second": 15.755,
12
+ "train_steps_per_second": 0.492
13
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 0.33,
3
- "eval_loss": 0.5382302403450012,
4
- "eval_runtime": 19.0163,
5
  "eval_samples": 64,
6
- "eval_samples_per_second": 3.366,
7
- "eval_steps_per_second": 0.105,
8
- "eval_wer": 55.12820512820513
9
  }
 
1
  {
2
+ "epoch": 0.25,
3
+ "eval_loss": 0.5364237427711487,
4
+ "eval_runtime": 16.6084,
5
  "eval_samples": 64,
6
+ "eval_samples_per_second": 3.853,
7
+ "eval_steps_per_second": 0.12,
8
+ "eval_wer": 54.57875457875458
9
  }
runs/Dec13_11-33-05_d7f040c448a8/events.out.tfevents.1670931860.d7f040c448a8.8569.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be20d1af189dd3a743d98769b5c7b0b572ea727fe663c5b4953c52758920bdfd
3
+ size 358
train.log CHANGED
@@ -83,3 +83,9 @@
83
  {'loss': 0.2844, 'learning_rate': 7.5500000000000006e-06, 'epoch': 0.25}
84
  {'eval_loss': 0.5341857671737671, 'eval_wer': 55.311355311355314, 'eval_runtime': 17.7172, 'eval_samples_per_second': 3.612, 'eval_steps_per_second': 0.113, 'epoch': 0.25}
85
  {'train_runtime': 406.2172, 'train_samples_per_second': 15.755, 'train_steps_per_second': 0.492, 'train_loss': 0.0719480574131012, 'epoch': 0.25}
 
 
 
 
 
 
 
83
  {'loss': 0.2844, 'learning_rate': 7.5500000000000006e-06, 'epoch': 0.25}
84
  {'eval_loss': 0.5341857671737671, 'eval_wer': 55.311355311355314, 'eval_runtime': 17.7172, 'eval_samples_per_second': 3.612, 'eval_steps_per_second': 0.113, 'epoch': 0.25}
85
  {'train_runtime': 406.2172, 'train_samples_per_second': 15.755, 'train_steps_per_second': 0.492, 'train_loss': 0.0719480574131012, 'epoch': 0.25}
86
+ ***** train metrics *****
87
+ epoch = 0.25
88
+ train_loss = 0.0719
89
+ train_runtime = 0:06:46.21
90
+ train_samples_per_second = 15.755
91
+ train_steps_per_second = 0.492
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 0.33,
3
- "train_loss": 0.13119232177734375,
4
- "train_runtime": 451.4438,
5
- "train_samples_per_second": 10.633,
6
- "train_steps_per_second": 0.332
7
  }
 
1
  {
2
+ "epoch": 0.25,
3
+ "train_loss": 0.0719480574131012,
4
+ "train_runtime": 406.2172,
5
+ "train_samples_per_second": 15.755,
6
+ "train_steps_per_second": 0.492
7
  }
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 55.12820512820513,
3
- "best_model_checkpoint": "./checkpoint-140",
4
- "epoch": 0.3333333333333333,
5
- "global_step": 150,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -240,11 +240,95 @@
240
  "train_runtime": 451.4438,
241
  "train_samples_per_second": 10.633,
242
  "train_steps_per_second": 0.332
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
  }
244
  ],
245
- "max_steps": 150,
246
  "num_train_epochs": 9223372036854775807,
247
- "total_flos": 1.18170648576e+17,
248
  "trial_name": null,
249
  "trial_params": null
250
  }
 
1
  {
2
+ "best_metric": 54.57875457875458,
3
+ "best_model_checkpoint": "./checkpoint-160",
4
+ "epoch": 0.25,
5
+ "global_step": 200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
240
  "train_runtime": 451.4438,
241
  "train_samples_per_second": 10.633,
242
  "train_steps_per_second": 0.332
243
+ },
244
+ {
245
+ "epoch": 0.05,
246
+ "learning_rate": 9.5e-06,
247
+ "loss": 0.2716,
248
+ "step": 160
249
+ },
250
+ {
251
+ "epoch": 0.05,
252
+ "eval_loss": 0.5364237427711487,
253
+ "eval_runtime": 16.1176,
254
+ "eval_samples_per_second": 3.971,
255
+ "eval_steps_per_second": 0.124,
256
+ "eval_wer": 54.57875457875458,
257
+ "step": 160
258
+ },
259
+ {
260
+ "epoch": 0.1,
261
+ "learning_rate": 9.050000000000001e-06,
262
+ "loss": 0.2765,
263
+ "step": 170
264
+ },
265
+ {
266
+ "epoch": 0.1,
267
+ "eval_loss": 0.5404230952262878,
268
+ "eval_runtime": 17.8451,
269
+ "eval_samples_per_second": 3.586,
270
+ "eval_steps_per_second": 0.112,
271
+ "eval_wer": 54.761904761904766,
272
+ "step": 170
273
+ },
274
+ {
275
+ "epoch": 0.15,
276
+ "learning_rate": 8.550000000000001e-06,
277
+ "loss": 0.2533,
278
+ "step": 180
279
+ },
280
+ {
281
+ "epoch": 0.15,
282
+ "eval_loss": 0.53352290391922,
283
+ "eval_runtime": 17.1042,
284
+ "eval_samples_per_second": 3.742,
285
+ "eval_steps_per_second": 0.117,
286
+ "eval_wer": 55.12820512820513,
287
+ "step": 180
288
+ },
289
+ {
290
+ "epoch": 0.2,
291
+ "learning_rate": 8.050000000000001e-06,
292
+ "loss": 0.3533,
293
+ "step": 190
294
+ },
295
+ {
296
+ "epoch": 0.2,
297
+ "eval_loss": 0.530021071434021,
298
+ "eval_runtime": 18.1912,
299
+ "eval_samples_per_second": 3.518,
300
+ "eval_steps_per_second": 0.11,
301
+ "eval_wer": 56.59340659340659,
302
+ "step": 190
303
+ },
304
+ {
305
+ "epoch": 0.25,
306
+ "learning_rate": 7.5500000000000006e-06,
307
+ "loss": 0.2844,
308
+ "step": 200
309
+ },
310
+ {
311
+ "epoch": 0.25,
312
+ "eval_loss": 0.5341857671737671,
313
+ "eval_runtime": 17.7172,
314
+ "eval_samples_per_second": 3.612,
315
+ "eval_steps_per_second": 0.113,
316
+ "eval_wer": 55.311355311355314,
317
+ "step": 200
318
+ },
319
+ {
320
+ "epoch": 0.25,
321
+ "step": 200,
322
+ "total_flos": 1.57560864768e+17,
323
+ "train_loss": 0.0719480574131012,
324
+ "train_runtime": 406.2172,
325
+ "train_samples_per_second": 15.755,
326
+ "train_steps_per_second": 0.492
327
  }
328
  ],
329
+ "max_steps": 200,
330
  "num_train_epochs": 9223372036854775807,
331
+ "total_flos": 1.57560864768e+17,
332
  "trial_name": null,
333
  "trial_params": null
334
  }