ales commited on
Commit
9e9ec3b
·
1 Parent(s): 7f108b5

End of training

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_loss": 0.4685819447040558,
4
- "eval_runtime": 16.4606,
5
  "eval_samples": 64,
6
- "eval_samples_per_second": 3.888,
7
- "eval_steps_per_second": 0.122,
8
- "eval_wer": 51.28205128205128,
9
- "train_loss": 0.8012711083889008,
10
- "train_runtime": 2053.4009,
11
- "train_samples_per_second": 3.117,
12
- "train_steps_per_second": 0.097
13
  }
 
1
  {
2
+ "epoch": 0.33,
3
+ "eval_loss": 0.43880951404571533,
4
+ "eval_runtime": 16.7298,
5
  "eval_samples": 64,
6
+ "eval_samples_per_second": 3.825,
7
+ "eval_steps_per_second": 0.12,
8
+ "eval_wer": 46.52014652014652,
9
+ "train_loss": 0.10500287771224975,
10
+ "train_runtime": 1208.0467,
11
+ "train_samples_per_second": 7.947,
12
+ "train_steps_per_second": 0.248
13
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_loss": 0.4685819447040558,
4
- "eval_runtime": 16.4606,
5
  "eval_samples": 64,
6
- "eval_samples_per_second": 3.888,
7
- "eval_steps_per_second": 0.122,
8
- "eval_wer": 51.28205128205128
9
  }
 
1
  {
2
+ "epoch": 0.33,
3
+ "eval_loss": 0.43880951404571533,
4
+ "eval_runtime": 16.7298,
5
  "eval_samples": 64,
6
+ "eval_samples_per_second": 3.825,
7
+ "eval_steps_per_second": 0.12,
8
+ "eval_wer": 46.52014652014652
9
  }
runs/Dec13_14-14-43_d7f040c448a8/events.out.tfevents.1670942499.d7f040c448a8.61677.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:017da71b5fc49a675e692d2511d60d4312881697dcd047d467bbf1e3e0b6cbf5
3
+ size 358
train.log CHANGED
@@ -244,3 +244,9 @@
244
  {'loss': 0.2921, 'learning_rate': 1.0344827586206898e-06, 'epoch': 0.33}
245
  {'eval_loss': 0.4282010793685913, 'eval_wer': 46.7032967032967, 'eval_runtime': 18.1178, 'eval_samples_per_second': 3.532, 'eval_steps_per_second': 0.11, 'epoch': 0.33}
246
  {'train_runtime': 1208.0467, 'train_samples_per_second': 7.947, 'train_steps_per_second': 0.248, 'train_loss': 0.10500287771224975, 'epoch': 0.33}
 
 
 
 
 
 
 
244
  {'loss': 0.2921, 'learning_rate': 1.0344827586206898e-06, 'epoch': 0.33}
245
  {'eval_loss': 0.4282010793685913, 'eval_wer': 46.7032967032967, 'eval_runtime': 18.1178, 'eval_samples_per_second': 3.532, 'eval_steps_per_second': 0.11, 'epoch': 0.33}
246
  {'train_runtime': 1208.0467, 'train_samples_per_second': 7.947, 'train_steps_per_second': 0.248, 'train_loss': 0.10500287771224975, 'epoch': 0.33}
247
+ ***** train metrics *****
248
+ epoch = 0.33
249
+ train_loss = 0.105
250
+ train_runtime = 0:20:08.04
251
+ train_samples_per_second = 7.947
252
+ train_steps_per_second = 0.248
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 1.0,
3
- "train_loss": 0.8012711083889008,
4
- "train_runtime": 2053.4009,
5
- "train_samples_per_second": 3.117,
6
- "train_steps_per_second": 0.097
7
  }
 
1
  {
2
+ "epoch": 0.33,
3
+ "train_loss": 0.10500287771224975,
4
+ "train_runtime": 1208.0467,
5
+ "train_samples_per_second": 7.947,
6
+ "train_steps_per_second": 0.248
7
  }
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 51.28205128205128,
3
- "best_model_checkpoint": "./checkpoint-190",
4
- "epoch": 1.0,
5
- "global_step": 200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -314,18 +314,168 @@
314
  "step": 200
315
  },
316
  {
317
- "epoch": 1.0,
318
- "step": 200,
319
- "total_flos": 1.57560864768e+17,
320
- "train_loss": 0.8012711083889008,
321
- "train_runtime": 2053.4009,
322
- "train_samples_per_second": 3.117,
323
- "train_steps_per_second": 0.097
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
324
  }
325
  ],
326
- "max_steps": 200,
327
  "num_train_epochs": 9223372036854775807,
328
- "total_flos": 1.57560864768e+17,
329
  "trial_name": null,
330
  "trial_params": null
331
  }
 
1
  {
2
+ "best_metric": 46.52014652014652,
3
+ "best_model_checkpoint": "./checkpoint-280",
4
+ "epoch": 0.3333333333333333,
5
+ "global_step": 300,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
314
  "step": 200
315
  },
316
  {
317
+ "epoch": 0.03,
318
+ "learning_rate": 3.206896551724138e-05,
319
+ "loss": 0.3956,
320
+ "step": 210
321
+ },
322
+ {
323
+ "epoch": 0.03,
324
+ "eval_loss": 0.46904227137565613,
325
+ "eval_runtime": 16.6622,
326
+ "eval_samples_per_second": 3.841,
327
+ "eval_steps_per_second": 0.12,
328
+ "eval_wer": 52.01465201465202,
329
+ "step": 210
330
+ },
331
+ {
332
+ "epoch": 0.07,
333
+ "learning_rate": 2.8620689655172417e-05,
334
+ "loss": 0.3719,
335
+ "step": 220
336
+ },
337
+ {
338
+ "epoch": 0.07,
339
+ "eval_loss": 0.4673417806625366,
340
+ "eval_runtime": 17.4548,
341
+ "eval_samples_per_second": 3.667,
342
+ "eval_steps_per_second": 0.115,
343
+ "eval_wer": 52.74725274725275,
344
+ "step": 220
345
+ },
346
+ {
347
+ "epoch": 0.1,
348
+ "learning_rate": 2.517241379310345e-05,
349
+ "loss": 0.3168,
350
+ "step": 230
351
+ },
352
+ {
353
+ "epoch": 0.1,
354
+ "eval_loss": 0.44994863867759705,
355
+ "eval_runtime": 17.0948,
356
+ "eval_samples_per_second": 3.744,
357
+ "eval_steps_per_second": 0.117,
358
+ "eval_wer": 51.46520146520146,
359
+ "step": 230
360
+ },
361
+ {
362
+ "epoch": 0.13,
363
+ "learning_rate": 2.1724137931034484e-05,
364
+ "loss": 0.3582,
365
+ "step": 240
366
+ },
367
+ {
368
+ "epoch": 0.13,
369
+ "eval_loss": 0.4524703323841095,
370
+ "eval_runtime": 17.7891,
371
+ "eval_samples_per_second": 3.598,
372
+ "eval_steps_per_second": 0.112,
373
+ "eval_wer": 46.88644688644688,
374
+ "step": 240
375
+ },
376
+ {
377
+ "epoch": 0.17,
378
+ "learning_rate": 1.827586206896552e-05,
379
+ "loss": 0.2475,
380
+ "step": 250
381
+ },
382
+ {
383
+ "epoch": 0.17,
384
+ "eval_loss": 0.46121472120285034,
385
+ "eval_runtime": 17.7044,
386
+ "eval_samples_per_second": 3.615,
387
+ "eval_steps_per_second": 0.113,
388
+ "eval_wer": 52.38095238095239,
389
+ "step": 250
390
+ },
391
+ {
392
+ "epoch": 0.2,
393
+ "learning_rate": 1.482758620689655e-05,
394
+ "loss": 0.2988,
395
+ "step": 260
396
+ },
397
+ {
398
+ "epoch": 0.2,
399
+ "eval_loss": 0.4346223473548889,
400
+ "eval_runtime": 19.6665,
401
+ "eval_samples_per_second": 3.254,
402
+ "eval_steps_per_second": 0.102,
403
+ "eval_wer": 49.81684981684982,
404
+ "step": 260
405
+ },
406
+ {
407
+ "epoch": 0.23,
408
+ "learning_rate": 1.1379310344827587e-05,
409
+ "loss": 0.2749,
410
+ "step": 270
411
+ },
412
+ {
413
+ "epoch": 0.23,
414
+ "eval_loss": 0.42485401034355164,
415
+ "eval_runtime": 17.6854,
416
+ "eval_samples_per_second": 3.619,
417
+ "eval_steps_per_second": 0.113,
418
+ "eval_wer": 48.9010989010989,
419
+ "step": 270
420
+ },
421
+ {
422
+ "epoch": 0.27,
423
+ "learning_rate": 7.93103448275862e-06,
424
+ "loss": 0.3368,
425
+ "step": 280
426
+ },
427
+ {
428
+ "epoch": 0.27,
429
+ "eval_loss": 0.43880951404571533,
430
+ "eval_runtime": 18.1285,
431
+ "eval_samples_per_second": 3.53,
432
+ "eval_steps_per_second": 0.11,
433
+ "eval_wer": 46.52014652014652,
434
+ "step": 280
435
+ },
436
+ {
437
+ "epoch": 0.3,
438
+ "learning_rate": 4.482758620689655e-06,
439
+ "loss": 0.2574,
440
+ "step": 290
441
+ },
442
+ {
443
+ "epoch": 0.3,
444
+ "eval_loss": 0.43085092306137085,
445
+ "eval_runtime": 18.1023,
446
+ "eval_samples_per_second": 3.535,
447
+ "eval_steps_per_second": 0.11,
448
+ "eval_wer": 46.7032967032967,
449
+ "step": 290
450
+ },
451
+ {
452
+ "epoch": 0.33,
453
+ "learning_rate": 1.0344827586206898e-06,
454
+ "loss": 0.2921,
455
+ "step": 300
456
+ },
457
+ {
458
+ "epoch": 0.33,
459
+ "eval_loss": 0.4282010793685913,
460
+ "eval_runtime": 18.1178,
461
+ "eval_samples_per_second": 3.532,
462
+ "eval_steps_per_second": 0.11,
463
+ "eval_wer": 46.7032967032967,
464
+ "step": 300
465
+ },
466
+ {
467
+ "epoch": 0.33,
468
+ "step": 300,
469
+ "total_flos": 2.36341297152e+17,
470
+ "train_loss": 0.10500287771224975,
471
+ "train_runtime": 1208.0467,
472
+ "train_samples_per_second": 7.947,
473
+ "train_steps_per_second": 0.248
474
  }
475
  ],
476
+ "max_steps": 300,
477
  "num_train_epochs": 9223372036854775807,
478
+ "total_flos": 2.36341297152e+17,
479
  "trial_name": null,
480
  "trial_params": null
481
  }