ippersky commited on
Commit
f9a418f
·
verified ·
1 Parent(s): d499641

Training in progress, step 1830, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c690532dde75d7f037320b5f3913514c7a0bff79f52688dcc983d5fb97dd97f
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3b3ca44ca71849aa9736bf2caa6c4a33b670570063ad036bc8fbcd3ed610f45
3
  size 966995080
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ecbdfd3f209ee1395730dd360cba7a898e53096ef2ca33f731a207501938165c
3
  size 1925064044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:264c7200f301cadaabfb6edb2eb02ced5f68f63db4926b53e608150943e45424
3
  size 1925064044
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5e82f96b77a35761cfa17e364b44b3b766596ff201963b0dda148aa2e1b89fb
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:735fd23f13374ea6c149d7a80aeb18d5231b8a21165e2180edd2ac1a60e8d824
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f3e57b087a882ce0fe38f97583d72cfa40b57e6b36b780759bd14633217f9fd
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae192710b7980b12a4f538a2ff8c8fb9bdd8115f7bf58fc2370cc0fc415fe3b7
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 6.666951123247926,
3
  "best_model_checkpoint": "./whisper-small-accented-zh/checkpoint-1000",
4
- "epoch": 1.639344262295082,
5
  "eval_steps": 1000,
6
- "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -296,6 +296,237 @@
296
  "eval_steps_per_second": 0.173,
297
  "eval_wer": 6.666951123247926,
298
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
  }
300
  ],
301
  "logging_steps": 25,
@@ -310,12 +541,12 @@
310
  "should_evaluate": false,
311
  "should_log": false,
312
  "should_save": true,
313
- "should_training_stop": false
314
  },
315
  "attributes": {}
316
  }
317
  },
318
- "total_flos": 4.61419196571648e+18,
319
  "train_batch_size": 16,
320
  "trial_name": null,
321
  "trial_params": null
 
1
  {
2
  "best_metric": 6.666951123247926,
3
  "best_model_checkpoint": "./whisper-small-accented-zh/checkpoint-1000",
4
+ "epoch": 3.0,
5
  "eval_steps": 1000,
6
+ "global_step": 1830,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
296
  "eval_steps_per_second": 0.173,
297
  "eval_wer": 6.666951123247926,
298
  "step": 1000
299
+ },
300
+ {
301
+ "epoch": 1.680327868852459,
302
+ "grad_norm": 2.7260067462921143,
303
+ "learning_rate": 6.067669172932331e-06,
304
+ "loss": 0.0887,
305
+ "step": 1025
306
+ },
307
+ {
308
+ "epoch": 1.721311475409836,
309
+ "grad_norm": 3.793546438217163,
310
+ "learning_rate": 5.879699248120301e-06,
311
+ "loss": 0.0805,
312
+ "step": 1050
313
+ },
314
+ {
315
+ "epoch": 1.762295081967213,
316
+ "grad_norm": 3.374661684036255,
317
+ "learning_rate": 5.6917293233082715e-06,
318
+ "loss": 0.0815,
319
+ "step": 1075
320
+ },
321
+ {
322
+ "epoch": 1.8032786885245902,
323
+ "grad_norm": 3.6348347663879395,
324
+ "learning_rate": 5.503759398496241e-06,
325
+ "loss": 0.0777,
326
+ "step": 1100
327
+ },
328
+ {
329
+ "epoch": 1.8442622950819674,
330
+ "grad_norm": 3.1936988830566406,
331
+ "learning_rate": 5.315789473684211e-06,
332
+ "loss": 0.0748,
333
+ "step": 1125
334
+ },
335
+ {
336
+ "epoch": 1.8852459016393444,
337
+ "grad_norm": 3.0702261924743652,
338
+ "learning_rate": 5.12781954887218e-06,
339
+ "loss": 0.0869,
340
+ "step": 1150
341
+ },
342
+ {
343
+ "epoch": 1.9262295081967213,
344
+ "grad_norm": 2.608741283416748,
345
+ "learning_rate": 4.9398496240601505e-06,
346
+ "loss": 0.0807,
347
+ "step": 1175
348
+ },
349
+ {
350
+ "epoch": 1.9672131147540983,
351
+ "grad_norm": 3.992398500442505,
352
+ "learning_rate": 4.751879699248121e-06,
353
+ "loss": 0.0743,
354
+ "step": 1200
355
+ },
356
+ {
357
+ "epoch": 2.0081967213114753,
358
+ "grad_norm": 2.5503103733062744,
359
+ "learning_rate": 4.56390977443609e-06,
360
+ "loss": 0.0613,
361
+ "step": 1225
362
+ },
363
+ {
364
+ "epoch": 2.0491803278688523,
365
+ "grad_norm": 2.326587438583374,
366
+ "learning_rate": 4.375939849624061e-06,
367
+ "loss": 0.0247,
368
+ "step": 1250
369
+ },
370
+ {
371
+ "epoch": 2.0901639344262297,
372
+ "grad_norm": 1.7671444416046143,
373
+ "learning_rate": 4.18796992481203e-06,
374
+ "loss": 0.0306,
375
+ "step": 1275
376
+ },
377
+ {
378
+ "epoch": 2.1311475409836067,
379
+ "grad_norm": 2.0086865425109863,
380
+ "learning_rate": 4.000000000000001e-06,
381
+ "loss": 0.0252,
382
+ "step": 1300
383
+ },
384
+ {
385
+ "epoch": 2.1721311475409837,
386
+ "grad_norm": 0.9309338331222534,
387
+ "learning_rate": 3.81203007518797e-06,
388
+ "loss": 0.0236,
389
+ "step": 1325
390
+ },
391
+ {
392
+ "epoch": 2.2131147540983607,
393
+ "grad_norm": 0.9044923782348633,
394
+ "learning_rate": 3.6240601503759406e-06,
395
+ "loss": 0.027,
396
+ "step": 1350
397
+ },
398
+ {
399
+ "epoch": 2.2540983606557377,
400
+ "grad_norm": 1.874260425567627,
401
+ "learning_rate": 3.43609022556391e-06,
402
+ "loss": 0.0298,
403
+ "step": 1375
404
+ },
405
+ {
406
+ "epoch": 2.2950819672131146,
407
+ "grad_norm": 0.8456152081489563,
408
+ "learning_rate": 3.24812030075188e-06,
409
+ "loss": 0.0207,
410
+ "step": 1400
411
+ },
412
+ {
413
+ "epoch": 2.3360655737704916,
414
+ "grad_norm": 1.6556758880615234,
415
+ "learning_rate": 3.06015037593985e-06,
416
+ "loss": 0.0349,
417
+ "step": 1425
418
+ },
419
+ {
420
+ "epoch": 2.3770491803278686,
421
+ "grad_norm": 1.3671510219573975,
422
+ "learning_rate": 2.8721804511278195e-06,
423
+ "loss": 0.0284,
424
+ "step": 1450
425
+ },
426
+ {
427
+ "epoch": 2.418032786885246,
428
+ "grad_norm": 1.1702046394348145,
429
+ "learning_rate": 2.68421052631579e-06,
430
+ "loss": 0.024,
431
+ "step": 1475
432
+ },
433
+ {
434
+ "epoch": 2.459016393442623,
435
+ "grad_norm": 2.4468085765838623,
436
+ "learning_rate": 2.4962406015037594e-06,
437
+ "loss": 0.0258,
438
+ "step": 1500
439
+ },
440
+ {
441
+ "epoch": 2.5,
442
+ "grad_norm": 1.201532244682312,
443
+ "learning_rate": 2.3082706766917294e-06,
444
+ "loss": 0.0254,
445
+ "step": 1525
446
+ },
447
+ {
448
+ "epoch": 2.540983606557377,
449
+ "grad_norm": 0.9549139738082886,
450
+ "learning_rate": 2.1203007518796993e-06,
451
+ "loss": 0.0256,
452
+ "step": 1550
453
+ },
454
+ {
455
+ "epoch": 2.581967213114754,
456
+ "grad_norm": 2.483381509780884,
457
+ "learning_rate": 1.9323308270676693e-06,
458
+ "loss": 0.0299,
459
+ "step": 1575
460
+ },
461
+ {
462
+ "epoch": 2.6229508196721314,
463
+ "grad_norm": 1.537619709968567,
464
+ "learning_rate": 1.7443609022556392e-06,
465
+ "loss": 0.0291,
466
+ "step": 1600
467
+ },
468
+ {
469
+ "epoch": 2.663934426229508,
470
+ "grad_norm": 1.5355826616287231,
471
+ "learning_rate": 1.5563909774436092e-06,
472
+ "loss": 0.0269,
473
+ "step": 1625
474
+ },
475
+ {
476
+ "epoch": 2.7049180327868854,
477
+ "grad_norm": 1.311331033706665,
478
+ "learning_rate": 1.3684210526315791e-06,
479
+ "loss": 0.0273,
480
+ "step": 1650
481
+ },
482
+ {
483
+ "epoch": 2.7459016393442623,
484
+ "grad_norm": 0.8623602390289307,
485
+ "learning_rate": 1.1804511278195489e-06,
486
+ "loss": 0.0219,
487
+ "step": 1675
488
+ },
489
+ {
490
+ "epoch": 2.7868852459016393,
491
+ "grad_norm": 1.6167572736740112,
492
+ "learning_rate": 9.924812030075188e-07,
493
+ "loss": 0.0303,
494
+ "step": 1700
495
+ },
496
+ {
497
+ "epoch": 2.8278688524590163,
498
+ "grad_norm": 2.8278274536132812,
499
+ "learning_rate": 8.045112781954888e-07,
500
+ "loss": 0.0245,
501
+ "step": 1725
502
+ },
503
+ {
504
+ "epoch": 2.8688524590163933,
505
+ "grad_norm": 2.3211958408355713,
506
+ "learning_rate": 6.165413533834587e-07,
507
+ "loss": 0.0241,
508
+ "step": 1750
509
+ },
510
+ {
511
+ "epoch": 2.9098360655737707,
512
+ "grad_norm": 1.8018407821655273,
513
+ "learning_rate": 4.285714285714286e-07,
514
+ "loss": 0.022,
515
+ "step": 1775
516
+ },
517
+ {
518
+ "epoch": 2.9508196721311473,
519
+ "grad_norm": 1.6640270948410034,
520
+ "learning_rate": 2.406015037593985e-07,
521
+ "loss": 0.025,
522
+ "step": 1800
523
+ },
524
+ {
525
+ "epoch": 2.9918032786885247,
526
+ "grad_norm": 0.6498773694038391,
527
+ "learning_rate": 5.263157894736842e-08,
528
+ "loss": 0.0234,
529
+ "step": 1825
530
  }
531
  ],
532
  "logging_steps": 25,
 
541
  "should_evaluate": false,
542
  "should_log": false,
543
  "should_save": true,
544
+ "should_training_stop": true
545
  },
546
  "attributes": {}
547
  }
548
  },
549
+ "total_flos": 8.44025720315904e+18,
550
  "train_batch_size": 16,
551
  "trial_name": null,
552
  "trial_params": null