boumehdi commited on
Commit
23d47d8
·
1 Parent(s): 21347a1

Upload 13 files

Browse files
Files changed (7) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. rng_state.pth +1 -1
  4. scaler.pt +1 -1
  5. scheduler.pt +1 -1
  6. trainer_state.json +362 -1517
  7. training_args.bin +1 -1
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1aacd357196d2ccb7b69b7eabae19fa89e889c9564d29e8e8552c19f401158b0
3
  size 2490594117
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e622da60d23e94e07cdb99ff234aaefa76f7c6fb21c57c6ab61ba5d19f8b9c7
3
  size 2490594117
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea7b88a67ffa34b8ff340a18a3f92eda4b2cd4806c641c0324b8c8995cca408d
3
  size 1262168365
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4512cd385771733271cfd87d2c72759096c7a1a777295cc9b7fef89f555dfd2
3
  size 1262168365
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:811f2ca8b1e9e9b61905e518ff2bbfb86d07420b0db70afcaa4bb789c5fc7609
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:015a8d4a0507b7ea92a5d2b6e83f85108572344d34478a240dd0c974135c5e09
3
  size 14575
scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b75f99a5a4a4b6ae83f76610c6becc08f675166496a0e823528855b7dc030640
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db575b4427150600ee37f6c6a1a316680eb05224eb75e119839f9b6fbad60117
3
  size 557
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11fb4e3697e269e34809008650d5c221e8a285a7472a4da3e755dee9bf0e7da6
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ff0fa787e174ecf06cbb98b01670eaefa203dc8ba7e6a8d2ed74e188444dfb3
3
  size 627
trainer_state.json CHANGED
@@ -1,1771 +1,616 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 95.90163934426229,
5
- "global_step": 11700,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.82,
12
  "learning_rate": 0.0001,
13
- "loss": 0.0216,
14
  "step": 100
15
  },
16
  {
17
- "epoch": 0.82,
18
- "eval_loss": 0.31533992290496826,
19
- "eval_runtime": 207.6066,
20
- "eval_samples_per_second": 16.782,
21
- "eval_steps_per_second": 2.1,
22
- "eval_wer": 0.21548575602629658,
23
  "step": 100
24
  },
25
  {
26
- "epoch": 1.64,
27
- "learning_rate": 9.991796554552912e-05,
28
- "loss": 0.0195,
29
  "step": 200
30
  },
31
  {
32
- "epoch": 1.64,
33
- "eval_loss": 0.33585426211357117,
34
- "eval_runtime": 141.4442,
35
- "eval_samples_per_second": 24.632,
36
- "eval_steps_per_second": 3.082,
37
- "eval_wer": 0.21469910659099847,
38
  "step": 200
39
  },
40
  {
41
- "epoch": 2.46,
42
- "learning_rate": 9.983593109105825e-05,
43
- "loss": 0.0213,
44
  "step": 300
45
  },
46
  {
47
- "epoch": 2.46,
48
- "eval_loss": 0.32860827445983887,
49
- "eval_runtime": 143.9808,
50
- "eval_samples_per_second": 24.198,
51
- "eval_steps_per_second": 3.028,
52
- "eval_wer": 0.21891329999438108,
53
  "step": 300
54
  },
55
  {
56
- "epoch": 3.28,
57
- "learning_rate": 9.975389663658737e-05,
58
- "loss": 0.0207,
59
  "step": 400
60
  },
61
  {
62
- "epoch": 3.28,
63
- "eval_loss": 0.34261101484298706,
64
- "eval_runtime": 151.928,
65
- "eval_samples_per_second": 22.932,
66
- "eval_steps_per_second": 2.87,
67
- "eval_wer": 0.21542956678091812,
68
  "step": 400
69
  },
70
  {
71
- "epoch": 4.1,
72
- "learning_rate": 9.967186218211649e-05,
73
- "loss": 0.0218,
74
  "step": 500
75
  },
76
  {
77
- "epoch": 4.1,
78
- "eval_loss": 0.3265298306941986,
79
- "eval_runtime": 157.208,
80
- "eval_samples_per_second": 22.162,
81
- "eval_steps_per_second": 2.773,
82
- "eval_wer": 0.2182952182952183,
83
  "step": 500
84
  },
85
  {
86
- "epoch": 4.92,
87
- "learning_rate": 9.958982772764562e-05,
88
- "loss": 0.0197,
89
  "step": 600
90
  },
91
  {
92
- "epoch": 4.92,
93
- "eval_loss": 0.3343443274497986,
94
- "eval_runtime": 155.21,
95
- "eval_samples_per_second": 22.447,
96
- "eval_steps_per_second": 2.809,
97
- "eval_wer": 0.22161038377254594,
98
  "step": 600
99
  },
100
  {
101
- "epoch": 5.74,
102
- "learning_rate": 9.950779327317475e-05,
103
- "loss": 0.0199,
104
  "step": 700
105
  },
106
  {
107
- "epoch": 5.74,
108
- "eval_loss": 0.34997832775115967,
109
- "eval_runtime": 162.2341,
110
- "eval_samples_per_second": 21.475,
111
- "eval_steps_per_second": 2.687,
112
- "eval_wer": 0.21885711074900263,
113
  "step": 700
114
  },
115
  {
116
- "epoch": 6.56,
117
- "learning_rate": 9.942575881870387e-05,
118
- "loss": 0.0208,
119
  "step": 800
120
  },
121
  {
122
- "epoch": 6.56,
123
- "eval_loss": 0.3275943994522095,
124
- "eval_runtime": 159.135,
125
- "eval_samples_per_second": 21.893,
126
- "eval_steps_per_second": 2.74,
127
- "eval_wer": 0.22048659886497723,
128
  "step": 800
129
  },
130
  {
131
- "epoch": 7.38,
132
- "learning_rate": 9.934372436423298e-05,
133
- "loss": 0.0206,
134
  "step": 900
135
  },
136
  {
137
- "epoch": 7.38,
138
- "eval_loss": 0.3496840000152588,
139
- "eval_runtime": 160.165,
140
- "eval_samples_per_second": 21.753,
141
- "eval_steps_per_second": 2.722,
142
- "eval_wer": 0.21329437545653762,
143
  "step": 900
144
  },
145
  {
146
- "epoch": 8.2,
147
- "learning_rate": 9.92616899097621e-05,
148
- "loss": 0.0195,
149
  "step": 1000
150
  },
151
  {
152
- "epoch": 8.2,
153
- "eval_loss": 0.3464973568916321,
154
- "eval_runtime": 159.4572,
155
- "eval_samples_per_second": 21.849,
156
- "eval_steps_per_second": 2.734,
157
- "eval_wer": 0.22172276226330281,
158
  "step": 1000
159
  },
160
  {
161
- "epoch": 9.02,
162
- "learning_rate": 9.917965545529123e-05,
163
- "loss": 0.0215,
164
  "step": 1100
165
  },
166
  {
167
- "epoch": 9.02,
168
- "eval_loss": 0.34134212136268616,
169
- "eval_runtime": 161.922,
170
- "eval_samples_per_second": 21.517,
171
- "eval_steps_per_second": 2.693,
172
- "eval_wer": 0.22599314491206382,
173
  "step": 1100
174
  },
175
  {
176
- "epoch": 9.84,
177
- "learning_rate": 9.909762100082035e-05,
178
- "loss": 0.0222,
179
  "step": 1200
180
  },
181
  {
182
- "epoch": 9.84,
183
- "eval_loss": 0.34097427129745483,
184
- "eval_runtime": 163.193,
185
- "eval_samples_per_second": 21.349,
186
- "eval_steps_per_second": 2.672,
187
- "eval_wer": 0.22852166095409338,
188
  "step": 1200
189
  },
190
  {
191
- "epoch": 10.66,
192
- "learning_rate": 9.901558654634947e-05,
193
- "loss": 0.0214,
194
  "step": 1300
195
  },
196
  {
197
- "epoch": 10.66,
198
- "eval_loss": 0.3526441752910614,
199
- "eval_runtime": 167.13,
200
- "eval_samples_per_second": 20.846,
201
- "eval_steps_per_second": 2.609,
202
- "eval_wer": 0.2244760352868461,
203
  "step": 1300
204
  },
205
  {
206
- "epoch": 11.48,
207
- "learning_rate": 9.893355209187858e-05,
208
- "loss": 0.0214,
209
  "step": 1400
210
  },
211
  {
212
- "epoch": 11.48,
213
- "eval_loss": 0.34132060408592224,
214
- "eval_runtime": 193.5042,
215
- "eval_samples_per_second": 18.005,
216
- "eval_steps_per_second": 2.253,
217
- "eval_wer": 0.2203742203742204,
218
  "step": 1400
219
  },
220
  {
221
- "epoch": 12.3,
222
- "learning_rate": 9.885151763740772e-05,
223
- "loss": 0.0222,
224
  "step": 1500
225
  },
226
  {
227
- "epoch": 12.3,
228
- "eval_loss": 0.34180501103401184,
229
- "eval_runtime": 195.2175,
230
- "eval_samples_per_second": 17.847,
231
- "eval_steps_per_second": 2.233,
232
- "eval_wer": 0.22211608698095184,
233
  "step": 1500
234
  },
235
  {
236
- "epoch": 13.11,
237
- "learning_rate": 9.876948318293683e-05,
238
- "loss": 0.024,
239
  "step": 1600
240
  },
241
  {
242
- "epoch": 13.11,
243
- "eval_loss": 0.3383408486843109,
244
- "eval_runtime": 197.1656,
245
- "eval_samples_per_second": 17.67,
246
- "eval_steps_per_second": 2.211,
247
- "eval_wer": 0.22352081811541272,
248
  "step": 1600
249
  },
250
  {
251
- "epoch": 13.93,
252
- "learning_rate": 9.868744872846596e-05,
253
- "loss": 0.0214,
254
  "step": 1700
255
  },
256
  {
257
- "epoch": 13.93,
258
- "eval_loss": 0.3334263563156128,
259
- "eval_runtime": 193.741,
260
- "eval_samples_per_second": 17.983,
261
- "eval_steps_per_second": 2.25,
262
- "eval_wer": 0.21632859470697308,
263
  "step": 1700
264
  },
265
  {
266
- "epoch": 14.75,
267
- "learning_rate": 9.860541427399508e-05,
268
- "loss": 0.02,
269
  "step": 1800
270
  },
271
  {
272
- "epoch": 14.75,
273
- "eval_loss": 0.336081862449646,
274
- "eval_runtime": 196.0013,
275
- "eval_samples_per_second": 17.775,
276
- "eval_steps_per_second": 2.224,
277
- "eval_wer": 0.2171714333876496,
278
  "step": 1800
279
  },
280
  {
281
- "epoch": 15.57,
282
- "learning_rate": 9.852337981952421e-05,
283
- "loss": 0.0196,
284
  "step": 1900
285
  },
286
  {
287
- "epoch": 15.57,
288
- "eval_loss": 0.33557742834091187,
289
- "eval_runtime": 195.9355,
290
- "eval_samples_per_second": 17.781,
291
- "eval_steps_per_second": 2.225,
292
- "eval_wer": 0.2231836826431421,
293
  "step": 1900
294
  },
295
  {
296
- "epoch": 16.39,
297
- "learning_rate": 9.844134536505333e-05,
298
- "loss": 0.0192,
299
  "step": 2000
300
  },
301
  {
302
- "epoch": 16.39,
303
- "eval_loss": 0.34101295471191406,
304
- "eval_runtime": 196.0796,
305
- "eval_samples_per_second": 17.768,
306
- "eval_steps_per_second": 2.224,
307
- "eval_wer": 0.21891329999438108,
308
  "step": 2000
309
  },
310
  {
311
- "epoch": 17.21,
312
- "learning_rate": 9.835931091058245e-05,
313
- "loss": 0.0194,
314
  "step": 2100
315
  },
316
  {
317
- "epoch": 17.21,
318
- "eval_loss": 0.33082982897758484,
319
- "eval_runtime": 197.1446,
320
- "eval_samples_per_second": 17.672,
321
- "eval_steps_per_second": 2.212,
322
- "eval_wer": 0.22065516660111253,
323
  "step": 2100
324
  },
325
  {
326
- "epoch": 18.03,
327
- "learning_rate": 9.827727645611156e-05,
328
- "loss": 0.0185,
329
  "step": 2200
330
  },
331
  {
332
- "epoch": 18.03,
333
- "eval_loss": 0.32813236117362976,
334
- "eval_runtime": 196.5485,
335
- "eval_samples_per_second": 17.726,
336
  "eval_steps_per_second": 2.218,
337
- "eval_wer": 0.21891329999438108,
338
  "step": 2200
339
  },
340
  {
341
- "epoch": 18.85,
342
- "learning_rate": 9.81952420016407e-05,
343
- "loss": 0.0193,
344
  "step": 2300
345
  },
346
  {
347
- "epoch": 18.85,
348
- "eval_loss": 0.33049654960632324,
349
- "eval_runtime": 199.567,
350
- "eval_samples_per_second": 17.458,
351
- "eval_steps_per_second": 2.185,
352
- "eval_wer": 0.22020565263808506,
353
  "step": 2300
354
  },
355
  {
356
- "epoch": 19.67,
357
- "learning_rate": 9.811320754716981e-05,
358
- "loss": 0.0187,
359
  "step": 2400
360
  },
361
  {
362
- "epoch": 19.67,
363
- "eval_loss": 0.32034164667129517,
364
- "eval_runtime": 194.435,
365
- "eval_samples_per_second": 17.919,
366
- "eval_steps_per_second": 2.242,
367
- "eval_wer": 0.2191380569758948,
368
  "step": 2400
369
  },
370
  {
371
- "epoch": 20.49,
372
- "learning_rate": 9.803117309269893e-05,
373
- "loss": 0.0198,
374
  "step": 2500
375
  },
376
  {
377
- "epoch": 20.49,
378
- "eval_loss": 0.3243141174316406,
379
- "eval_runtime": 195.3165,
380
- "eval_samples_per_second": 17.838,
381
- "eval_steps_per_second": 2.232,
382
- "eval_wer": 0.21981232792043603,
383
  "step": 2500
384
  },
385
  {
386
- "epoch": 21.31,
387
- "learning_rate": 9.794995898277277e-05,
388
- "loss": 0.02,
389
  "step": 2600
390
  },
391
  {
392
- "epoch": 21.31,
393
- "eval_loss": 0.34270626306533813,
394
- "eval_runtime": 198.8904,
395
- "eval_samples_per_second": 17.517,
396
- "eval_steps_per_second": 2.192,
397
- "eval_wer": 0.22082373433724786,
398
  "step": 2600
399
  },
400
  {
401
- "epoch": 22.13,
402
- "learning_rate": 9.786792452830189e-05,
403
- "loss": 0.0183,
404
  "step": 2700
405
  },
406
  {
407
- "epoch": 22.13,
408
- "eval_loss": 0.3502834141254425,
409
- "eval_runtime": 185.3267,
410
- "eval_samples_per_second": 18.799,
411
- "eval_steps_per_second": 2.353,
412
- "eval_wer": 0.21874473225824578,
413
  "step": 2700
414
  },
415
  {
416
- "epoch": 22.95,
417
- "learning_rate": 9.7785890073831e-05,
418
- "loss": 0.0171,
419
  "step": 2800
420
  },
421
  {
422
- "epoch": 22.95,
423
- "eval_loss": 0.3397138714790344,
424
- "eval_runtime": 200.1956,
425
- "eval_samples_per_second": 17.403,
426
- "eval_steps_per_second": 2.178,
427
- "eval_wer": 0.22228465471708714,
428
  "step": 2800
429
  },
430
  {
431
- "epoch": 23.77,
432
- "learning_rate": 9.770385561936014e-05,
433
- "loss": 0.018,
434
  "step": 2900
435
  },
436
  {
437
- "epoch": 23.77,
438
- "eval_loss": 0.3545791208744049,
439
- "eval_runtime": 199.5635,
440
- "eval_samples_per_second": 17.458,
441
- "eval_steps_per_second": 2.185,
442
- "eval_wer": 0.2228465471708715,
443
  "step": 2900
444
  },
445
  {
446
- "epoch": 24.59,
447
- "learning_rate": 9.762182116488927e-05,
448
- "loss": 0.0175,
449
  "step": 3000
450
  },
451
  {
452
- "epoch": 24.59,
453
- "eval_loss": 0.33466094732284546,
454
- "eval_runtime": 198.0419,
455
- "eval_samples_per_second": 17.592,
456
- "eval_steps_per_second": 2.202,
457
- "eval_wer": 0.21958757093892228,
458
  "step": 3000
459
  },
460
  {
461
- "epoch": 25.41,
462
- "learning_rate": 9.753978671041838e-05,
463
- "loss": 0.0189,
464
  "step": 3100
465
  },
466
  {
467
- "epoch": 25.41,
468
- "eval_loss": 0.3368154764175415,
469
- "eval_runtime": 203.168,
470
- "eval_samples_per_second": 17.148,
471
- "eval_steps_per_second": 2.146,
472
- "eval_wer": 0.21958757093892228,
473
  "step": 3100
474
  },
475
  {
476
- "epoch": 26.23,
477
- "learning_rate": 9.74577522559475e-05,
478
- "loss": 0.0176,
479
  "step": 3200
480
  },
481
  {
482
- "epoch": 26.23,
483
- "eval_loss": 0.34034982323646545,
484
- "eval_runtime": 195.3083,
485
- "eval_samples_per_second": 17.838,
486
- "eval_steps_per_second": 2.232,
487
- "eval_wer": 0.2207675450918694,
488
  "step": 3200
489
  },
490
  {
491
- "epoch": 27.05,
492
- "learning_rate": 9.737571780147663e-05,
493
- "loss": 0.0191,
494
  "step": 3300
495
  },
496
  {
497
- "epoch": 27.05,
498
- "eval_loss": 0.3415772616863251,
499
- "eval_runtime": 201.8077,
500
- "eval_samples_per_second": 17.264,
501
- "eval_steps_per_second": 2.16,
502
- "eval_wer": 0.2209361128280047,
503
  "step": 3300
504
  },
505
  {
506
- "epoch": 27.87,
507
- "learning_rate": 9.729368334700575e-05,
508
- "loss": 0.0203,
509
  "step": 3400
510
  },
511
  {
512
- "epoch": 27.87,
513
- "eval_loss": 0.3269742429256439,
514
- "eval_runtime": 198.0471,
515
- "eval_samples_per_second": 17.592,
516
- "eval_steps_per_second": 2.201,
517
- "eval_wer": 0.21840759678597516,
518
  "step": 3400
519
  },
520
  {
521
- "epoch": 28.69,
522
- "learning_rate": 9.721164889253487e-05,
523
- "loss": 0.0178,
524
  "step": 3500
525
  },
526
  {
527
- "epoch": 28.69,
528
- "eval_loss": 0.3441150188446045,
529
- "eval_runtime": 198.5015,
530
- "eval_samples_per_second": 17.552,
531
- "eval_steps_per_second": 2.196,
532
- "eval_wer": 0.22155419452716751,
533
  "step": 3500
534
  },
535
  {
536
- "epoch": 29.51,
537
- "learning_rate": 9.712961443806398e-05,
538
- "loss": 0.0178,
539
  "step": 3600
540
  },
541
  {
542
- "epoch": 29.51,
543
- "eval_loss": 0.3359436094760895,
544
- "eval_runtime": 196.1391,
545
- "eval_samples_per_second": 17.763,
546
- "eval_steps_per_second": 2.223,
547
- "eval_wer": 0.22172276226330281,
548
  "step": 3600
549
  },
550
  {
551
- "epoch": 30.33,
552
- "learning_rate": 9.704757998359312e-05,
553
- "loss": 0.0202,
554
  "step": 3700
555
  },
556
  {
557
- "epoch": 30.33,
558
- "eval_loss": 0.3397650122642517,
559
- "eval_runtime": 196.1562,
560
- "eval_samples_per_second": 17.761,
561
- "eval_steps_per_second": 2.223,
562
- "eval_wer": 0.22503792774063044,
563
  "step": 3700
564
  },
565
  {
566
- "epoch": 31.15,
567
- "learning_rate": 9.696554552912223e-05,
568
- "loss": 0.0203,
569
  "step": 3800
570
  },
571
  {
572
- "epoch": 31.15,
573
- "eval_loss": 0.32521852850914,
574
- "eval_runtime": 197.1456,
575
- "eval_samples_per_second": 17.672,
576
- "eval_steps_per_second": 2.212,
577
- "eval_wer": 0.22571219868517164,
578
  "step": 3800
579
  },
580
  {
581
- "epoch": 31.97,
582
- "learning_rate": 9.688351107465135e-05,
583
- "loss": 0.0221,
584
  "step": 3900
585
  },
586
  {
587
- "epoch": 31.97,
588
- "eval_loss": 0.3298187553882599,
589
- "eval_runtime": 198.9372,
590
- "eval_samples_per_second": 17.513,
591
- "eval_steps_per_second": 2.192,
592
- "eval_wer": 0.22728549755576782,
593
  "step": 3900
594
  },
595
  {
596
- "epoch": 32.79,
597
- "learning_rate": 9.680147662018048e-05,
598
- "loss": 0.0206,
599
  "step": 4000
600
  },
601
  {
602
- "epoch": 32.79,
603
- "eval_loss": 0.3412954807281494,
604
- "eval_runtime": 195.7736,
605
- "eval_samples_per_second": 17.796,
606
- "eval_steps_per_second": 2.227,
607
- "eval_wer": 0.22363319660616957,
608
  "step": 4000
609
- },
610
- {
611
- "epoch": 33.61,
612
- "learning_rate": 9.671944216570961e-05,
613
- "loss": 0.0207,
614
- "step": 4100
615
- },
616
- {
617
- "epoch": 33.61,
618
- "eval_loss": 0.3309689164161682,
619
- "eval_runtime": 201.1531,
620
- "eval_samples_per_second": 17.32,
621
- "eval_steps_per_second": 2.168,
622
- "eval_wer": 0.2248131707591167,
623
- "step": 4100
624
- },
625
- {
626
- "epoch": 34.43,
627
- "learning_rate": 9.663740771123873e-05,
628
- "loss": 0.018,
629
- "step": 4200
630
- },
631
- {
632
- "epoch": 34.43,
633
- "eval_loss": 0.34882888197898865,
634
- "eval_runtime": 197.3922,
635
- "eval_samples_per_second": 17.65,
636
- "eval_steps_per_second": 2.209,
637
- "eval_wer": 0.22329606113389897,
638
- "step": 4200
639
- },
640
- {
641
- "epoch": 35.25,
642
- "learning_rate": 9.655537325676785e-05,
643
- "loss": 0.0193,
644
- "step": 4300
645
- },
646
- {
647
- "epoch": 35.25,
648
- "eval_loss": 0.30975356698036194,
649
- "eval_runtime": 183.1875,
650
- "eval_samples_per_second": 19.019,
651
- "eval_steps_per_second": 2.38,
652
- "eval_wer": 0.22357700736079114,
653
- "step": 4300
654
- },
655
- {
656
- "epoch": 36.07,
657
- "learning_rate": 9.647333880229696e-05,
658
- "loss": 0.0193,
659
- "step": 4400
660
- },
661
- {
662
- "epoch": 36.07,
663
- "eval_loss": 0.33493199944496155,
664
- "eval_runtime": 183.8125,
665
- "eval_samples_per_second": 18.954,
666
- "eval_steps_per_second": 2.372,
667
- "eval_wer": 0.2226779794347362,
668
- "step": 4400
669
- },
670
- {
671
- "epoch": 36.89,
672
- "learning_rate": 9.63913043478261e-05,
673
- "loss": 0.0179,
674
- "step": 4500
675
- },
676
- {
677
- "epoch": 36.89,
678
- "eval_loss": 0.339138388633728,
679
- "eval_runtime": 183.2812,
680
- "eval_samples_per_second": 19.009,
681
- "eval_steps_per_second": 2.379,
682
- "eval_wer": 0.2220598977355734,
683
- "step": 4500
684
- },
685
- {
686
- "epoch": 37.7,
687
- "learning_rate": 9.630926989335521e-05,
688
- "loss": 0.0188,
689
- "step": 4600
690
- },
691
- {
692
- "epoch": 37.7,
693
- "eval_loss": 0.3318282663822174,
694
- "eval_runtime": 186.1719,
695
- "eval_samples_per_second": 18.714,
696
- "eval_steps_per_second": 2.342,
697
- "eval_wer": 0.22408271056919707,
698
- "step": 4600
699
- },
700
- {
701
- "epoch": 38.52,
702
- "learning_rate": 9.622805578342905e-05,
703
- "loss": 0.0191,
704
- "step": 4700
705
- },
706
- {
707
- "epoch": 38.52,
708
- "eval_loss": 0.3265712261199951,
709
- "eval_runtime": 184.3594,
710
- "eval_samples_per_second": 18.898,
711
- "eval_steps_per_second": 2.365,
712
- "eval_wer": 0.2248131707591167,
713
- "step": 4700
714
- },
715
- {
716
- "epoch": 39.34,
717
- "learning_rate": 9.614602132895817e-05,
718
- "loss": 0.02,
719
- "step": 4800
720
- },
721
- {
722
- "epoch": 39.34,
723
- "eval_loss": 0.32977160811424255,
724
- "eval_runtime": 183.375,
725
- "eval_samples_per_second": 18.999,
726
- "eval_steps_per_second": 2.378,
727
- "eval_wer": 0.22335225037927742,
728
- "step": 4800
729
- },
730
- {
731
- "epoch": 40.16,
732
- "learning_rate": 9.606398687448729e-05,
733
- "loss": 0.0194,
734
- "step": 4900
735
- },
736
- {
737
- "epoch": 40.16,
738
- "eval_loss": 0.33653536438941956,
739
- "eval_runtime": 182.625,
740
- "eval_samples_per_second": 19.077,
741
- "eval_steps_per_second": 2.387,
742
- "eval_wer": 0.22520649547676574,
743
- "step": 4900
744
- },
745
- {
746
- "epoch": 40.98,
747
- "learning_rate": 9.59819524200164e-05,
748
- "loss": 0.019,
749
- "step": 5000
750
- },
751
- {
752
- "epoch": 40.98,
753
- "eval_loss": 0.3455631732940674,
754
- "eval_runtime": 185.0148,
755
- "eval_samples_per_second": 18.831,
756
- "eval_steps_per_second": 2.357,
757
- "eval_wer": 0.21998089565657133,
758
- "step": 5000
759
- },
760
- {
761
- "epoch": 41.8,
762
- "learning_rate": 9.589991796554553e-05,
763
- "loss": 0.026,
764
- "step": 5100
765
- },
766
- {
767
- "epoch": 41.8,
768
- "eval_loss": 0.31202027201652527,
769
- "eval_runtime": 187.5,
770
- "eval_samples_per_second": 18.581,
771
- "eval_steps_per_second": 2.325,
772
- "eval_wer": 0.21953138169354386,
773
- "step": 5100
774
- },
775
- {
776
- "epoch": 42.62,
777
- "learning_rate": 9.581788351107465e-05,
778
- "loss": 0.0247,
779
- "step": 5200
780
- },
781
- {
782
- "epoch": 42.62,
783
- "eval_loss": 0.3445728123188019,
784
- "eval_runtime": 183.4063,
785
- "eval_samples_per_second": 18.996,
786
- "eval_steps_per_second": 2.377,
787
- "eval_wer": 0.22082373433724786,
788
- "step": 5200
789
- },
790
- {
791
- "epoch": 43.44,
792
- "learning_rate": 9.573584905660378e-05,
793
- "loss": 0.024,
794
- "step": 5300
795
- },
796
- {
797
- "epoch": 43.44,
798
- "eval_loss": 0.3246464431285858,
799
- "eval_runtime": 197.7008,
800
- "eval_samples_per_second": 17.623,
801
- "eval_steps_per_second": 2.205,
802
- "eval_wer": 0.2205989773557341,
803
- "step": 5300
804
- },
805
- {
806
- "epoch": 44.26,
807
- "learning_rate": 9.56538146021329e-05,
808
- "loss": 0.0221,
809
- "step": 5400
810
- },
811
- {
812
- "epoch": 44.26,
813
- "eval_loss": 0.3357507586479187,
814
- "eval_runtime": 200.2422,
815
- "eval_samples_per_second": 17.399,
816
- "eval_steps_per_second": 2.177,
817
- "eval_wer": 0.21981232792043603,
818
- "step": 5400
819
- },
820
- {
821
- "epoch": 45.08,
822
- "learning_rate": 9.557178014766203e-05,
823
- "loss": 0.023,
824
- "step": 5500
825
- },
826
- {
827
- "epoch": 45.08,
828
- "eval_loss": 0.33350640535354614,
829
- "eval_runtime": 200.3914,
830
- "eval_samples_per_second": 17.386,
831
- "eval_steps_per_second": 2.176,
832
- "eval_wer": 0.22571219868517164,
833
- "step": 5500
834
- },
835
- {
836
- "epoch": 45.9,
837
- "learning_rate": 9.548974569319115e-05,
838
- "loss": 0.0239,
839
- "step": 5600
840
- },
841
- {
842
- "epoch": 45.9,
843
- "eval_loss": 0.32670244574546814,
844
- "eval_runtime": 195.9769,
845
- "eval_samples_per_second": 17.778,
846
- "eval_steps_per_second": 2.225,
847
- "eval_wer": 0.22020565263808506,
848
- "step": 5600
849
- },
850
- {
851
- "epoch": 46.72,
852
- "learning_rate": 9.540771123872027e-05,
853
- "loss": 0.023,
854
- "step": 5700
855
- },
856
- {
857
- "epoch": 46.72,
858
- "eval_loss": 0.33632007241249084,
859
- "eval_runtime": 198.7025,
860
- "eval_samples_per_second": 17.534,
861
- "eval_steps_per_second": 2.194,
862
- "eval_wer": 0.2231836826431421,
863
- "step": 5700
864
- },
865
- {
866
- "epoch": 47.54,
867
- "learning_rate": 9.532567678424938e-05,
868
- "loss": 0.0237,
869
- "step": 5800
870
- },
871
- {
872
- "epoch": 47.54,
873
- "eval_loss": 0.35056358575820923,
874
- "eval_runtime": 199.1109,
875
- "eval_samples_per_second": 17.498,
876
- "eval_steps_per_second": 2.19,
877
- "eval_wer": 0.22099230207338316,
878
- "step": 5800
879
- },
880
- {
881
- "epoch": 48.36,
882
- "learning_rate": 9.524364232977851e-05,
883
- "loss": 0.0251,
884
- "step": 5900
885
- },
886
- {
887
- "epoch": 48.36,
888
- "eval_loss": 0.33351680636405945,
889
- "eval_runtime": 198.4757,
890
- "eval_samples_per_second": 17.554,
891
- "eval_steps_per_second": 2.197,
892
- "eval_wer": 0.22082373433724786,
893
- "step": 5900
894
- },
895
- {
896
- "epoch": 49.18,
897
- "learning_rate": 9.516160787530763e-05,
898
- "loss": 0.024,
899
- "step": 6000
900
- },
901
- {
902
- "epoch": 49.18,
903
- "eval_loss": 0.3367384672164917,
904
- "eval_runtime": 203.7806,
905
- "eval_samples_per_second": 17.097,
906
- "eval_steps_per_second": 2.14,
907
- "eval_wer": 0.2140810248918357,
908
- "step": 6000
909
- },
910
- {
911
- "epoch": 50.0,
912
- "learning_rate": 9.507957342083675e-05,
913
- "loss": 0.023,
914
- "step": 6100
915
- },
916
- {
917
- "epoch": 50.0,
918
- "eval_loss": 0.33463725447654724,
919
- "eval_runtime": 202.5214,
920
- "eval_samples_per_second": 17.203,
921
- "eval_steps_per_second": 2.153,
922
- "eval_wer": 0.2207675450918694,
923
- "step": 6100
924
- },
925
- {
926
- "epoch": 50.82,
927
- "learning_rate": 9.499753896636588e-05,
928
- "loss": 0.0237,
929
- "step": 6200
930
- },
931
- {
932
- "epoch": 50.82,
933
- "eval_loss": 0.3415164053440094,
934
- "eval_runtime": 199.1709,
935
- "eval_samples_per_second": 17.493,
936
- "eval_steps_per_second": 2.189,
937
- "eval_wer": 0.22290273641624994,
938
- "step": 6200
939
- },
940
- {
941
- "epoch": 51.64,
942
- "learning_rate": 9.4915504511895e-05,
943
- "loss": 0.0238,
944
- "step": 6300
945
- },
946
- {
947
- "epoch": 51.64,
948
- "eval_loss": 0.34561434388160706,
949
- "eval_runtime": 196.5595,
950
- "eval_samples_per_second": 17.725,
951
- "eval_steps_per_second": 2.218,
952
- "eval_wer": 0.21638478395235153,
953
- "step": 6300
954
- },
955
- {
956
- "epoch": 52.46,
957
- "learning_rate": 9.483347005742413e-05,
958
- "loss": 0.0242,
959
- "step": 6400
960
- },
961
- {
962
- "epoch": 52.46,
963
- "eval_loss": 0.34634143114089966,
964
- "eval_runtime": 202.0785,
965
- "eval_samples_per_second": 17.241,
966
- "eval_steps_per_second": 2.158,
967
- "eval_wer": 0.22666741585660505,
968
- "step": 6400
969
- },
970
- {
971
- "epoch": 53.28,
972
- "learning_rate": 9.475143560295325e-05,
973
- "loss": 0.0238,
974
- "step": 6500
975
- },
976
- {
977
- "epoch": 53.28,
978
- "eval_loss": 0.3443451225757599,
979
- "eval_runtime": 198.1661,
980
- "eval_samples_per_second": 17.581,
981
- "eval_steps_per_second": 2.2,
982
- "eval_wer": 0.21722762263302803,
983
- "step": 6500
984
- },
985
- {
986
- "epoch": 54.1,
987
- "learning_rate": 9.466940114848236e-05,
988
- "loss": 0.0247,
989
- "step": 6600
990
- },
991
- {
992
- "epoch": 54.1,
993
- "eval_loss": 0.33527693152427673,
994
- "eval_runtime": 202.0925,
995
- "eval_samples_per_second": 17.24,
996
- "eval_steps_per_second": 2.157,
997
- "eval_wer": 0.21941900320278698,
998
- "step": 6600
999
- },
1000
- {
1001
- "epoch": 54.92,
1002
- "learning_rate": 9.45873666940115e-05,
1003
- "loss": 0.024,
1004
- "step": 6700
1005
- },
1006
- {
1007
- "epoch": 54.92,
1008
- "eval_loss": 0.33691638708114624,
1009
- "eval_runtime": 203.115,
1010
- "eval_samples_per_second": 17.153,
1011
- "eval_steps_per_second": 2.147,
1012
- "eval_wer": 0.21481148508175535,
1013
- "step": 6700
1014
- },
1015
- {
1016
- "epoch": 55.74,
1017
- "learning_rate": 9.450533223954061e-05,
1018
- "loss": 0.023,
1019
- "step": 6800
1020
- },
1021
- {
1022
- "epoch": 55.74,
1023
- "eval_loss": 0.3394332826137543,
1024
- "eval_runtime": 205.3023,
1025
- "eval_samples_per_second": 16.97,
1026
- "eval_steps_per_second": 2.124,
1027
- "eval_wer": 0.21807046131370456,
1028
- "step": 6800
1029
- },
1030
- {
1031
- "epoch": 56.56,
1032
- "learning_rate": 9.442329778506973e-05,
1033
- "loss": 0.0226,
1034
- "step": 6900
1035
- },
1036
- {
1037
- "epoch": 56.56,
1038
- "eval_loss": 0.3425767123699188,
1039
- "eval_runtime": 204.2505,
1040
- "eval_samples_per_second": 17.057,
1041
- "eval_steps_per_second": 2.135,
1042
- "eval_wer": 0.21818283980446143,
1043
- "step": 6900
1044
- },
1045
- {
1046
- "epoch": 57.38,
1047
- "learning_rate": 9.434126333059886e-05,
1048
- "loss": 0.0233,
1049
- "step": 7000
1050
- },
1051
- {
1052
- "epoch": 57.38,
1053
- "eval_loss": 0.3361387252807617,
1054
- "eval_runtime": 204.131,
1055
- "eval_samples_per_second": 17.067,
1056
- "eval_steps_per_second": 2.136,
1057
- "eval_wer": 0.21981232792043603,
1058
- "step": 7000
1059
- },
1060
- {
1061
- "epoch": 58.2,
1062
- "learning_rate": 9.425922887612798e-05,
1063
- "loss": 0.0231,
1064
- "step": 7100
1065
- },
1066
- {
1067
- "epoch": 58.2,
1068
- "eval_loss": 0.3401791751384735,
1069
- "eval_runtime": 204.9298,
1070
- "eval_samples_per_second": 17.001,
1071
- "eval_steps_per_second": 2.128,
1072
- "eval_wer": 0.2207675450918694,
1073
- "step": 7100
1074
- },
1075
- {
1076
- "epoch": 59.02,
1077
- "learning_rate": 9.41780147662018e-05,
1078
- "loss": 0.0231,
1079
- "step": 7200
1080
- },
1081
- {
1082
- "epoch": 59.02,
1083
- "eval_loss": 0.32741424441337585,
1084
- "eval_runtime": 205.0196,
1085
- "eval_samples_per_second": 16.993,
1086
- "eval_steps_per_second": 2.127,
1087
- "eval_wer": 0.22295892566162837,
1088
- "step": 7200
1089
- },
1090
- {
1091
- "epoch": 59.84,
1092
- "learning_rate": 9.409598031173093e-05,
1093
- "loss": 0.0242,
1094
- "step": 7300
1095
- },
1096
- {
1097
- "epoch": 59.84,
1098
- "eval_loss": 0.3416125774383545,
1099
- "eval_runtime": 205.8312,
1100
- "eval_samples_per_second": 16.926,
1101
- "eval_steps_per_second": 2.118,
1102
- "eval_wer": 0.21773332584143396,
1103
- "step": 7300
1104
- },
1105
- {
1106
- "epoch": 60.66,
1107
- "learning_rate": 9.401394585726005e-05,
1108
- "loss": 0.0221,
1109
- "step": 7400
1110
- },
1111
- {
1112
- "epoch": 60.66,
1113
- "eval_loss": 0.3495969772338867,
1114
- "eval_runtime": 207.6167,
1115
- "eval_samples_per_second": 16.781,
1116
- "eval_steps_per_second": 2.1,
1117
- "eval_wer": 0.21874473225824578,
1118
- "step": 7400
1119
- },
1120
- {
1121
- "epoch": 61.48,
1122
- "learning_rate": 9.393191140278917e-05,
1123
- "loss": 0.0231,
1124
- "step": 7500
1125
- },
1126
- {
1127
- "epoch": 61.48,
1128
- "eval_loss": 0.33186405897140503,
1129
- "eval_runtime": 206.7658,
1130
- "eval_samples_per_second": 16.85,
1131
- "eval_steps_per_second": 2.109,
1132
- "eval_wer": 0.22155419452716751,
1133
- "step": 7500
1134
- },
1135
- {
1136
- "epoch": 62.3,
1137
- "learning_rate": 9.38498769483183e-05,
1138
- "loss": 0.021,
1139
- "step": 7600
1140
- },
1141
- {
1142
- "epoch": 62.3,
1143
- "eval_loss": 0.3377366065979004,
1144
- "eval_runtime": 203.3638,
1145
- "eval_samples_per_second": 17.132,
1146
- "eval_steps_per_second": 2.144,
1147
- "eval_wer": 0.21998089565657133,
1148
- "step": 7600
1149
- },
1150
- {
1151
- "epoch": 63.11,
1152
- "learning_rate": 9.376784249384743e-05,
1153
- "loss": 0.022,
1154
- "step": 7700
1155
- },
1156
- {
1157
- "epoch": 63.11,
1158
- "eval_loss": 0.3331567943096161,
1159
- "eval_runtime": 203.4814,
1160
- "eval_samples_per_second": 17.122,
1161
- "eval_steps_per_second": 2.143,
1162
- "eval_wer": 0.22127324830027534,
1163
- "step": 7700
1164
- },
1165
- {
1166
- "epoch": 63.93,
1167
- "learning_rate": 9.368580803937655e-05,
1168
- "loss": 0.0216,
1169
- "step": 7800
1170
- },
1171
- {
1172
- "epoch": 63.93,
1173
- "eval_loss": 0.32978036999702454,
1174
- "eval_runtime": 204.6963,
1175
- "eval_samples_per_second": 17.02,
1176
- "eval_steps_per_second": 2.13,
1177
- "eval_wer": 0.2230151149070068,
1178
- "step": 7800
1179
- },
1180
- {
1181
- "epoch": 64.75,
1182
- "learning_rate": 9.360377358490567e-05,
1183
- "loss": 0.021,
1184
- "step": 7900
1185
- },
1186
- {
1187
- "epoch": 64.75,
1188
- "eval_loss": 0.3439036011695862,
1189
- "eval_runtime": 203.7134,
1190
- "eval_samples_per_second": 17.102,
1191
- "eval_steps_per_second": 2.14,
1192
- "eval_wer": 0.22329606113389897,
1193
- "step": 7900
1194
- },
1195
- {
1196
- "epoch": 65.57,
1197
- "learning_rate": 9.352173913043478e-05,
1198
- "loss": 0.0217,
1199
- "step": 8000
1200
- },
1201
- {
1202
- "epoch": 65.57,
1203
- "eval_loss": 0.34148770570755005,
1204
- "eval_runtime": 202.6414,
1205
- "eval_samples_per_second": 17.193,
1206
- "eval_steps_per_second": 2.152,
1207
- "eval_wer": 0.21969994942967916,
1208
- "step": 8000
1209
- },
1210
- {
1211
- "epoch": 66.39,
1212
- "learning_rate": 9.343970467596391e-05,
1213
- "loss": 0.021,
1214
- "step": 8100
1215
- },
1216
- {
1217
- "epoch": 66.39,
1218
- "eval_loss": 0.3486896753311157,
1219
- "eval_runtime": 202.1874,
1220
- "eval_samples_per_second": 17.232,
1221
- "eval_steps_per_second": 2.156,
1222
- "eval_wer": 0.21969994942967916,
1223
- "step": 8100
1224
- },
1225
- {
1226
- "epoch": 67.21,
1227
- "learning_rate": 9.335767022149303e-05,
1228
- "loss": 0.0207,
1229
- "step": 8200
1230
- },
1231
- {
1232
- "epoch": 67.21,
1233
- "eval_loss": 0.3460260331630707,
1234
- "eval_runtime": 200.0049,
1235
- "eval_samples_per_second": 17.42,
1236
- "eval_steps_per_second": 2.18,
1237
- "eval_wer": 0.21795808282294768,
1238
- "step": 8200
1239
- },
1240
- {
1241
- "epoch": 68.03,
1242
- "learning_rate": 9.327563576702215e-05,
1243
- "loss": 0.0212,
1244
- "step": 8300
1245
- },
1246
- {
1247
- "epoch": 68.03,
1248
- "eval_loss": 0.35000941157341003,
1249
- "eval_runtime": 202.0373,
1250
- "eval_samples_per_second": 17.244,
1251
- "eval_steps_per_second": 2.158,
1252
- "eval_wer": 0.21627240546159465,
1253
- "step": 8300
1254
- },
1255
- {
1256
- "epoch": 68.85,
1257
- "learning_rate": 9.319360131255127e-05,
1258
- "loss": 0.0223,
1259
- "step": 8400
1260
- },
1261
- {
1262
- "epoch": 68.85,
1263
- "eval_loss": 0.33812931180000305,
1264
- "eval_runtime": 201.8642,
1265
- "eval_samples_per_second": 17.259,
1266
- "eval_steps_per_second": 2.16,
1267
- "eval_wer": 0.2200932741473282,
1268
- "step": 8400
1269
- },
1270
- {
1271
- "epoch": 69.67,
1272
- "learning_rate": 9.31123872026251e-05,
1273
- "loss": 0.0224,
1274
- "step": 8500
1275
- },
1276
- {
1277
- "epoch": 69.67,
1278
- "eval_loss": 0.3443692624568939,
1279
- "eval_runtime": 202.0712,
1280
- "eval_samples_per_second": 17.241,
1281
- "eval_steps_per_second": 2.158,
1282
- "eval_wer": 0.21936281395740856,
1283
- "step": 8500
1284
- },
1285
- {
1286
- "epoch": 70.49,
1287
- "learning_rate": 9.303035274815422e-05,
1288
- "loss": 0.0229,
1289
- "step": 8600
1290
- },
1291
- {
1292
- "epoch": 70.49,
1293
- "eval_loss": 0.3441421389579773,
1294
- "eval_runtime": 203.724,
1295
- "eval_samples_per_second": 17.102,
1296
- "eval_steps_per_second": 2.14,
1297
- "eval_wer": 0.2227341686801146,
1298
- "step": 8600
1299
- },
1300
- {
1301
- "epoch": 71.31,
1302
- "learning_rate": 9.294831829368335e-05,
1303
- "loss": 0.0212,
1304
- "step": 8700
1305
- },
1306
- {
1307
- "epoch": 71.31,
1308
- "eval_loss": 0.3485627770423889,
1309
- "eval_runtime": 202.8979,
1310
- "eval_samples_per_second": 17.171,
1311
- "eval_steps_per_second": 2.149,
1312
- "eval_wer": 0.2218913299994381,
1313
- "step": 8700
1314
- },
1315
- {
1316
- "epoch": 72.13,
1317
- "learning_rate": 9.286628383921247e-05,
1318
- "loss": 0.0221,
1319
- "step": 8800
1320
- },
1321
- {
1322
- "epoch": 72.13,
1323
- "eval_loss": 0.33275967836380005,
1324
- "eval_runtime": 204.5042,
1325
- "eval_samples_per_second": 17.036,
1326
- "eval_steps_per_second": 2.132,
1327
- "eval_wer": 0.22116086980951846,
1328
- "step": 8800
1329
- },
1330
- {
1331
- "epoch": 72.95,
1332
- "learning_rate": 9.27842493847416e-05,
1333
- "loss": 0.02,
1334
- "step": 8900
1335
- },
1336
- {
1337
- "epoch": 72.95,
1338
- "eval_loss": 0.3454422950744629,
1339
- "eval_runtime": 203.7032,
1340
- "eval_samples_per_second": 17.103,
1341
- "eval_steps_per_second": 2.14,
1342
- "eval_wer": 0.2212170590548969,
1343
- "step": 8900
1344
- },
1345
- {
1346
- "epoch": 73.77,
1347
- "learning_rate": 9.270221493027072e-05,
1348
- "loss": 0.0202,
1349
- "step": 9000
1350
- },
1351
- {
1352
- "epoch": 73.77,
1353
- "eval_loss": 0.36007246375083923,
1354
- "eval_runtime": 200.6262,
1355
- "eval_samples_per_second": 17.366,
1356
- "eval_steps_per_second": 2.173,
1357
- "eval_wer": 0.21677810867000055,
1358
- "step": 9000
1359
- },
1360
- {
1361
- "epoch": 74.59,
1362
- "learning_rate": 9.262018047579985e-05,
1363
- "loss": 0.021,
1364
- "step": 9100
1365
- },
1366
- {
1367
- "epoch": 74.59,
1368
- "eval_loss": 0.34157735109329224,
1369
- "eval_runtime": 204.0017,
1370
- "eval_samples_per_second": 17.078,
1371
- "eval_steps_per_second": 2.137,
1372
- "eval_wer": 0.2205427881103557,
1373
- "step": 9100
1374
- },
1375
- {
1376
- "epoch": 75.41,
1377
- "learning_rate": 9.253814602132897e-05,
1378
- "loss": 0.0217,
1379
- "step": 9200
1380
- },
1381
- {
1382
- "epoch": 75.41,
1383
- "eval_loss": 0.35333356261253357,
1384
- "eval_runtime": 207.6876,
1385
- "eval_samples_per_second": 16.775,
1386
- "eval_steps_per_second": 2.099,
1387
- "eval_wer": 0.2230151149070068,
1388
- "step": 9200
1389
- },
1390
- {
1391
- "epoch": 76.23,
1392
- "learning_rate": 9.245611156685808e-05,
1393
- "loss": 0.0209,
1394
- "step": 9300
1395
- },
1396
- {
1397
- "epoch": 76.23,
1398
- "eval_loss": 0.35574042797088623,
1399
- "eval_runtime": 204.8643,
1400
- "eval_samples_per_second": 17.006,
1401
- "eval_steps_per_second": 2.128,
1402
- "eval_wer": 0.22020565263808506,
1403
- "step": 9300
1404
- },
1405
- {
1406
- "epoch": 77.05,
1407
- "learning_rate": 9.23740771123872e-05,
1408
- "loss": 0.0214,
1409
- "step": 9400
1410
- },
1411
- {
1412
- "epoch": 77.05,
1413
- "eval_loss": 0.3214012086391449,
1414
- "eval_runtime": 203.9509,
1415
- "eval_samples_per_second": 17.083,
1416
- "eval_steps_per_second": 2.138,
1417
- "eval_wer": 0.21851997527673203,
1418
- "step": 9400
1419
- },
1420
- {
1421
- "epoch": 77.87,
1422
- "learning_rate": 9.229204265791633e-05,
1423
- "loss": 0.0212,
1424
- "step": 9500
1425
- },
1426
- {
1427
- "epoch": 77.87,
1428
- "eval_loss": 0.33761337399482727,
1429
- "eval_runtime": 205.1801,
1430
- "eval_samples_per_second": 16.98,
1431
- "eval_steps_per_second": 2.125,
1432
- "eval_wer": 0.21745237961454178,
1433
- "step": 9500
1434
- },
1435
- {
1436
- "epoch": 78.69,
1437
- "learning_rate": 9.221000820344545e-05,
1438
- "loss": 0.0214,
1439
- "step": 9600
1440
- },
1441
- {
1442
- "epoch": 78.69,
1443
- "eval_loss": 0.32581719756126404,
1444
- "eval_runtime": 204.8134,
1445
- "eval_samples_per_second": 17.011,
1446
- "eval_steps_per_second": 2.129,
1447
- "eval_wer": 0.21722762263302803,
1448
- "step": 9600
1449
- },
1450
- {
1451
- "epoch": 79.51,
1452
- "learning_rate": 9.212797374897457e-05,
1453
- "loss": 0.022,
1454
- "step": 9700
1455
- },
1456
- {
1457
- "epoch": 79.51,
1458
- "eval_loss": 0.3441867530345917,
1459
- "eval_runtime": 207.7734,
1460
- "eval_samples_per_second": 16.768,
1461
- "eval_steps_per_second": 2.098,
1462
- "eval_wer": 0.2220598977355734,
1463
- "step": 9700
1464
- },
1465
- {
1466
- "epoch": 80.33,
1467
- "learning_rate": 9.204593929450369e-05,
1468
- "loss": 0.0236,
1469
- "step": 9800
1470
- },
1471
- {
1472
- "epoch": 80.33,
1473
- "eval_loss": 0.33785006403923035,
1474
- "eval_runtime": 206.8523,
1475
- "eval_samples_per_second": 16.843,
1476
- "eval_steps_per_second": 2.108,
1477
- "eval_wer": 0.22458841377760297,
1478
- "step": 9800
1479
- },
1480
- {
1481
- "epoch": 81.15,
1482
- "learning_rate": 9.196390484003282e-05,
1483
- "loss": 0.0231,
1484
- "step": 9900
1485
- },
1486
- {
1487
- "epoch": 81.15,
1488
- "eval_loss": 0.33509162068367004,
1489
- "eval_runtime": 205.9688,
1490
- "eval_samples_per_second": 16.915,
1491
- "eval_steps_per_second": 2.117,
1492
- "eval_wer": 0.2182952182952183,
1493
- "step": 9900
1494
- },
1495
- {
1496
- "epoch": 81.97,
1497
- "learning_rate": 9.188187038556195e-05,
1498
- "loss": 0.0206,
1499
- "step": 10000
1500
- },
1501
- {
1502
- "epoch": 81.97,
1503
- "eval_loss": 0.331687331199646,
1504
- "eval_runtime": 207.4393,
1505
- "eval_samples_per_second": 16.795,
1506
- "eval_steps_per_second": 2.102,
1507
- "eval_wer": 0.21644097319772995,
1508
- "step": 10000
1509
- },
1510
- {
1511
- "epoch": 82.79,
1512
- "learning_rate": 9.179983593109106e-05,
1513
- "loss": 0.0199,
1514
- "step": 10100
1515
- },
1516
- {
1517
- "epoch": 82.79,
1518
- "eval_loss": 0.3436143398284912,
1519
- "eval_runtime": 207.0841,
1520
- "eval_samples_per_second": 16.824,
1521
- "eval_steps_per_second": 2.105,
1522
- "eval_wer": 0.2175085688599202,
1523
- "step": 10100
1524
- },
1525
- {
1526
- "epoch": 83.61,
1527
- "learning_rate": 9.171780147662018e-05,
1528
- "loss": 0.0201,
1529
- "step": 10200
1530
- },
1531
- {
1532
- "epoch": 83.61,
1533
- "eval_loss": 0.3532947599887848,
1534
- "eval_runtime": 206.5219,
1535
- "eval_samples_per_second": 16.87,
1536
- "eval_steps_per_second": 2.111,
1537
- "eval_wer": 0.21689048716075743,
1538
- "step": 10200
1539
- },
1540
- {
1541
- "epoch": 84.43,
1542
- "learning_rate": 9.163576702214931e-05,
1543
- "loss": 0.0194,
1544
- "step": 10300
1545
- },
1546
- {
1547
- "epoch": 84.43,
1548
- "eval_loss": 0.3359845280647278,
1549
- "eval_runtime": 206.1506,
1550
- "eval_samples_per_second": 16.9,
1551
- "eval_steps_per_second": 2.115,
1552
- "eval_wer": 0.2166657301792437,
1553
- "step": 10300
1554
- },
1555
- {
1556
- "epoch": 85.25,
1557
- "learning_rate": 9.155373256767843e-05,
1558
- "loss": 0.0195,
1559
- "step": 10400
1560
- },
1561
- {
1562
- "epoch": 85.25,
1563
- "eval_loss": 0.34440097212791443,
1564
- "eval_runtime": 201.6413,
1565
- "eval_samples_per_second": 17.278,
1566
- "eval_steps_per_second": 2.162,
1567
- "eval_wer": 0.21773332584143396,
1568
- "step": 10400
1569
- },
1570
- {
1571
- "epoch": 86.07,
1572
- "learning_rate": 9.147169811320755e-05,
1573
- "loss": 0.0214,
1574
- "step": 10500
1575
- },
1576
- {
1577
- "epoch": 86.07,
1578
- "eval_loss": 0.355741411447525,
1579
- "eval_runtime": 201.504,
1580
- "eval_samples_per_second": 17.29,
1581
- "eval_steps_per_second": 2.164,
1582
- "eval_wer": 0.21756475810529866,
1583
- "step": 10500
1584
- },
1585
- {
1586
- "epoch": 86.89,
1587
- "learning_rate": 9.138966365873666e-05,
1588
- "loss": 0.0201,
1589
- "step": 10600
1590
- },
1591
- {
1592
- "epoch": 86.89,
1593
- "eval_loss": 0.3295002281665802,
1594
- "eval_runtime": 202.835,
1595
- "eval_samples_per_second": 17.177,
1596
- "eval_steps_per_second": 2.15,
1597
- "eval_wer": 0.21812665055908298,
1598
- "step": 10600
1599
- },
1600
- {
1601
- "epoch": 87.7,
1602
- "learning_rate": 9.13076292042658e-05,
1603
- "loss": 0.0201,
1604
- "step": 10700
1605
- },
1606
- {
1607
- "epoch": 87.7,
1608
- "eval_loss": 0.32654306292533875,
1609
- "eval_runtime": 204.4319,
1610
- "eval_samples_per_second": 17.042,
1611
- "eval_steps_per_second": 2.133,
1612
- "eval_wer": 0.21593526998932405,
1613
- "step": 10700
1614
- },
1615
- {
1616
- "epoch": 88.52,
1617
- "learning_rate": 9.122559474979491e-05,
1618
- "loss": 0.0205,
1619
- "step": 10800
1620
- },
1621
- {
1622
- "epoch": 88.52,
1623
- "eval_loss": 0.35505411028862,
1624
- "eval_runtime": 200.1439,
1625
- "eval_samples_per_second": 17.407,
1626
- "eval_steps_per_second": 2.178,
1627
- "eval_wer": 0.22374557509692644,
1628
- "step": 10800
1629
- },
1630
- {
1631
- "epoch": 89.34,
1632
- "learning_rate": 9.114356029532404e-05,
1633
- "loss": 0.0207,
1634
- "step": 10900
1635
- },
1636
- {
1637
- "epoch": 89.34,
1638
- "eval_loss": 0.32287225127220154,
1639
- "eval_runtime": 202.1756,
1640
- "eval_samples_per_second": 17.233,
1641
- "eval_steps_per_second": 2.157,
1642
- "eval_wer": 0.22144181603641064,
1643
- "step": 10900
1644
- },
1645
- {
1646
- "epoch": 90.16,
1647
- "learning_rate": 9.106152584085316e-05,
1648
- "loss": 0.0196,
1649
- "step": 11000
1650
- },
1651
- {
1652
- "epoch": 90.16,
1653
- "eval_loss": 0.3513171374797821,
1654
- "eval_runtime": 200.3912,
1655
- "eval_samples_per_second": 17.386,
1656
- "eval_steps_per_second": 2.176,
1657
- "eval_wer": 0.2175085688599202,
1658
- "step": 11000
1659
- },
1660
- {
1661
- "epoch": 90.98,
1662
- "learning_rate": 9.097949138638229e-05,
1663
- "loss": 0.0192,
1664
- "step": 11100
1665
- },
1666
- {
1667
- "epoch": 90.98,
1668
- "eval_loss": 0.35852402448654175,
1669
- "eval_runtime": 203.5945,
1670
- "eval_samples_per_second": 17.112,
1671
- "eval_steps_per_second": 2.142,
1672
- "eval_wer": 0.21998089565657133,
1673
- "step": 11100
1674
- },
1675
- {
1676
- "epoch": 91.8,
1677
- "learning_rate": 9.089745693191141e-05,
1678
- "loss": 0.0199,
1679
- "step": 11200
1680
- },
1681
- {
1682
- "epoch": 91.8,
1683
- "eval_loss": 0.3458695411682129,
1684
- "eval_runtime": 186.9089,
1685
- "eval_samples_per_second": 18.64,
1686
- "eval_steps_per_second": 2.333,
1687
- "eval_wer": 0.22127324830027534,
1688
- "step": 11200
1689
- },
1690
- {
1691
- "epoch": 92.62,
1692
- "learning_rate": 9.081542247744053e-05,
1693
- "loss": 0.0191,
1694
- "step": 11300
1695
- },
1696
- {
1697
- "epoch": 92.62,
1698
- "eval_loss": 0.3403360843658447,
1699
- "eval_runtime": 186.3002,
1700
- "eval_samples_per_second": 18.701,
1701
- "eval_steps_per_second": 2.34,
1702
- "eval_wer": 0.22172276226330281,
1703
- "step": 11300
1704
- },
1705
- {
1706
- "epoch": 93.44,
1707
- "learning_rate": 9.073338802296966e-05,
1708
- "loss": 0.019,
1709
- "step": 11400
1710
- },
1711
- {
1712
- "epoch": 93.44,
1713
- "eval_loss": 0.34108027815818787,
1714
- "eval_runtime": 184.4375,
1715
- "eval_samples_per_second": 18.89,
1716
- "eval_steps_per_second": 2.364,
1717
- "eval_wer": 0.21486767432713377,
1718
- "step": 11400
1719
- },
1720
- {
1721
- "epoch": 94.26,
1722
- "learning_rate": 9.065135356849878e-05,
1723
- "loss": 0.0197,
1724
- "step": 11500
1725
- },
1726
- {
1727
- "epoch": 94.26,
1728
- "eval_loss": 0.3422238528728485,
1729
- "eval_runtime": 191.25,
1730
- "eval_samples_per_second": 18.217,
1731
- "eval_steps_per_second": 2.28,
1732
- "eval_wer": 0.22082373433724786,
1733
- "step": 11500
1734
- },
1735
- {
1736
- "epoch": 95.08,
1737
- "learning_rate": 9.056931911402789e-05,
1738
- "loss": 0.0197,
1739
- "step": 11600
1740
- },
1741
- {
1742
- "epoch": 95.08,
1743
- "eval_loss": 0.33903756737709045,
1744
- "eval_runtime": 208.792,
1745
- "eval_samples_per_second": 16.686,
1746
- "eval_steps_per_second": 2.088,
1747
- "eval_wer": 0.21986851716581446,
1748
- "step": 11600
1749
- },
1750
- {
1751
- "epoch": 95.9,
1752
- "learning_rate": 9.048728465955701e-05,
1753
- "loss": 0.0207,
1754
- "step": 11700
1755
- },
1756
- {
1757
- "epoch": 95.9,
1758
- "eval_loss": 0.322264164686203,
1759
- "eval_runtime": 207.201,
1760
- "eval_samples_per_second": 16.815,
1761
- "eval_steps_per_second": 2.104,
1762
- "eval_wer": 0.21351913243805135,
1763
- "step": 11700
1764
  }
1765
  ],
1766
- "max_steps": 122000,
1767
  "num_train_epochs": 1000,
1768
- "total_flos": 2.640848798488764e+20,
1769
  "trial_name": null,
1770
  "trial_params": null
1771
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 31.99304865938431,
5
+ "global_step": 4000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.79,
12
  "learning_rate": 0.0001,
13
+ "loss": 0.0392,
14
  "step": 100
15
  },
16
  {
17
+ "epoch": 0.79,
18
+ "eval_loss": 0.23765751719474792,
19
+ "eval_runtime": 220.5891,
20
+ "eval_samples_per_second": 16.297,
21
+ "eval_steps_per_second": 2.04,
22
+ "eval_wer": 0.21887876816308827,
23
  "step": 100
24
  },
25
  {
26
+ "epoch": 1.6,
27
+ "learning_rate": 9.991993594875901e-05,
28
+ "loss": 0.0336,
29
  "step": 200
30
  },
31
  {
32
+ "epoch": 1.6,
33
+ "eval_loss": 0.26165521144866943,
34
+ "eval_runtime": 157.9726,
35
+ "eval_samples_per_second": 22.757,
36
+ "eval_steps_per_second": 2.849,
37
+ "eval_wer": 0.21649316851008457,
38
  "step": 200
39
  },
40
  {
41
+ "epoch": 2.4,
42
+ "learning_rate": 9.983987189751803e-05,
43
+ "loss": 0.0293,
44
  "step": 300
45
  },
46
  {
47
+ "epoch": 2.4,
48
+ "eval_loss": 0.28323182463645935,
49
+ "eval_runtime": 169.3448,
50
+ "eval_samples_per_second": 21.229,
51
+ "eval_steps_per_second": 2.657,
52
+ "eval_wer": 0.2197462589459987,
53
  "step": 300
54
  },
55
  {
56
+ "epoch": 3.2,
57
+ "learning_rate": 9.975980784627703e-05,
58
+ "loss": 0.0283,
59
  "step": 400
60
  },
61
  {
62
+ "epoch": 3.2,
63
+ "eval_loss": 0.29311421513557434,
64
+ "eval_runtime": 175.6178,
65
+ "eval_samples_per_second": 20.471,
66
+ "eval_steps_per_second": 2.562,
67
+ "eval_wer": 0.21508349598785512,
68
  "step": 400
69
  },
70
  {
71
+ "epoch": 3.99,
72
+ "learning_rate": 9.967974379503603e-05,
73
+ "loss": 0.0274,
74
  "step": 500
75
  },
76
  {
77
+ "epoch": 3.99,
78
+ "eval_loss": 0.30566948652267456,
79
+ "eval_runtime": 182.1945,
80
+ "eval_samples_per_second": 19.732,
81
+ "eval_steps_per_second": 2.47,
82
+ "eval_wer": 0.21828236824983735,
83
  "step": 500
84
  },
85
  {
86
+ "epoch": 4.79,
87
+ "learning_rate": 9.959967974379504e-05,
88
+ "loss": 0.0264,
89
  "step": 600
90
  },
91
  {
92
+ "epoch": 4.79,
93
+ "eval_loss": 0.30197781324386597,
94
+ "eval_runtime": 174.7969,
95
+ "eval_samples_per_second": 20.567,
96
+ "eval_steps_per_second": 2.574,
97
+ "eval_wer": 0.2171437865972674,
98
  "step": 600
99
  },
100
  {
101
+ "epoch": 5.6,
102
+ "learning_rate": 9.951961569255405e-05,
103
+ "loss": 0.0259,
104
  "step": 700
105
  },
106
  {
107
+ "epoch": 5.6,
108
+ "eval_loss": 0.3002428412437439,
109
+ "eval_runtime": 191.2838,
110
+ "eval_samples_per_second": 18.794,
111
+ "eval_steps_per_second": 2.353,
112
+ "eval_wer": 0.21725222294513122,
113
  "step": 700
114
  },
115
  {
116
+ "epoch": 6.4,
117
+ "learning_rate": 9.943955164131305e-05,
118
+ "loss": 0.0254,
119
  "step": 800
120
  },
121
  {
122
+ "epoch": 6.4,
123
+ "eval_loss": 0.3097267746925354,
124
+ "eval_runtime": 183.9687,
125
+ "eval_samples_per_second": 19.541,
126
+ "eval_steps_per_second": 2.446,
127
+ "eval_wer": 0.21752331381479073,
128
  "step": 800
129
  },
130
  {
131
+ "epoch": 7.2,
132
+ "learning_rate": 9.936028823058447e-05,
133
+ "loss": 0.0252,
134
  "step": 900
135
  },
136
  {
137
+ "epoch": 7.2,
138
+ "eval_loss": 0.2970833480358124,
139
+ "eval_runtime": 182.1563,
140
+ "eval_samples_per_second": 19.736,
141
+ "eval_steps_per_second": 2.47,
142
+ "eval_wer": 0.2170353502494036,
143
  "step": 900
144
  },
145
  {
146
+ "epoch": 7.99,
147
+ "learning_rate": 9.928022417934349e-05,
148
+ "loss": 0.0234,
149
  "step": 1000
150
  },
151
  {
152
+ "epoch": 7.99,
153
+ "eval_loss": 0.31429246068000793,
154
+ "eval_runtime": 193.7999,
155
+ "eval_samples_per_second": 18.55,
156
+ "eval_steps_per_second": 2.322,
157
+ "eval_wer": 0.21405335068314899,
158
  "step": 1000
159
  },
160
  {
161
+ "epoch": 8.79,
162
+ "learning_rate": 9.920016012810248e-05,
163
+ "loss": 0.0228,
164
  "step": 1100
165
  },
166
  {
167
+ "epoch": 8.79,
168
+ "eval_loss": 0.32985326647758484,
169
+ "eval_runtime": 194.3544,
170
+ "eval_samples_per_second": 18.497,
171
+ "eval_steps_per_second": 2.315,
172
+ "eval_wer": 0.2142702233788766,
173
  "step": 1100
174
  },
175
  {
176
+ "epoch": 9.6,
177
+ "learning_rate": 9.912009607686149e-05,
178
+ "loss": 0.0236,
179
  "step": 1200
180
  },
181
  {
182
+ "epoch": 9.6,
183
+ "eval_loss": 0.3165593147277832,
184
+ "eval_runtime": 185.0001,
185
+ "eval_samples_per_second": 19.432,
186
+ "eval_steps_per_second": 2.432,
187
+ "eval_wer": 0.21833658642376924,
188
  "step": 1200
189
  },
190
  {
191
+ "epoch": 10.4,
192
+ "learning_rate": 9.90400320256205e-05,
193
+ "loss": 0.0241,
194
  "step": 1300
195
  },
196
  {
197
+ "epoch": 10.4,
198
+ "eval_loss": 0.3285478949546814,
199
+ "eval_runtime": 181.9532,
200
+ "eval_samples_per_second": 19.758,
201
+ "eval_steps_per_second": 2.473,
202
+ "eval_wer": 0.2192582953806116,
203
  "step": 1300
204
  },
205
  {
206
+ "epoch": 11.2,
207
+ "learning_rate": 9.895996797437951e-05,
208
+ "loss": 0.0243,
209
  "step": 1400
210
  },
211
  {
212
+ "epoch": 11.2,
213
+ "eval_loss": 0.3187803626060486,
214
+ "eval_runtime": 200.4409,
215
+ "eval_samples_per_second": 17.935,
216
+ "eval_steps_per_second": 2.245,
217
+ "eval_wer": 0.22099327694643245,
218
  "step": 1400
219
  },
220
  {
221
+ "epoch": 11.99,
222
+ "learning_rate": 9.887990392313852e-05,
223
+ "loss": 0.026,
224
  "step": 1500
225
  },
226
  {
227
+ "epoch": 11.99,
228
+ "eval_loss": 0.32988375425338745,
229
+ "eval_runtime": 199.5224,
230
+ "eval_samples_per_second": 18.018,
231
+ "eval_steps_per_second": 2.255,
232
+ "eval_wer": 0.22375840381695944,
233
  "step": 1500
234
  },
235
  {
236
+ "epoch": 12.79,
237
+ "learning_rate": 9.879983987189752e-05,
238
+ "loss": 0.0259,
239
  "step": 1600
240
  },
241
  {
242
+ "epoch": 12.79,
243
+ "eval_loss": 0.3099309206008911,
244
+ "eval_runtime": 198.0592,
245
+ "eval_samples_per_second": 18.151,
246
+ "eval_steps_per_second": 2.272,
247
+ "eval_wer": 0.22045109520711342,
248
  "step": 1600
249
  },
250
  {
251
+ "epoch": 13.6,
252
+ "learning_rate": 9.871977582065654e-05,
253
+ "loss": 0.0255,
254
  "step": 1700
255
  },
256
  {
257
+ "epoch": 13.6,
258
+ "eval_loss": 0.30543622374534607,
259
+ "eval_runtime": 202.823,
260
+ "eval_samples_per_second": 17.725,
261
+ "eval_steps_per_second": 2.219,
262
+ "eval_wer": 0.22402949468661895,
263
  "step": 1700
264
  },
265
  {
266
+ "epoch": 14.4,
267
+ "learning_rate": 9.863971176941553e-05,
268
+ "loss": 0.0253,
269
  "step": 1800
270
  },
271
  {
272
+ "epoch": 14.4,
273
+ "eval_loss": 0.31324318051338196,
274
+ "eval_runtime": 199.664,
275
+ "eval_samples_per_second": 18.005,
276
+ "eval_steps_per_second": 2.254,
277
+ "eval_wer": 0.21779440468445022,
278
  "step": 1800
279
  },
280
  {
281
+ "epoch": 15.2,
282
+ "learning_rate": 9.855964771817454e-05,
283
+ "loss": 0.0244,
284
  "step": 1900
285
  },
286
  {
287
+ "epoch": 15.2,
288
+ "eval_loss": 0.33187857270240784,
289
+ "eval_runtime": 197.0859,
290
+ "eval_samples_per_second": 18.241,
291
+ "eval_steps_per_second": 2.283,
292
+ "eval_wer": 0.22121014964216004,
293
  "step": 1900
294
  },
295
  {
296
+ "epoch": 15.99,
297
+ "learning_rate": 9.847958366693355e-05,
298
+ "loss": 0.0231,
299
  "step": 2000
300
  },
301
  {
302
+ "epoch": 15.99,
303
+ "eval_loss": 0.33831512928009033,
304
+ "eval_runtime": 200.252,
305
+ "eval_samples_per_second": 17.952,
306
+ "eval_steps_per_second": 2.247,
307
+ "eval_wer": 0.21920407720667967,
308
  "step": 2000
309
  },
310
  {
311
+ "epoch": 16.79,
312
+ "learning_rate": 9.839951961569256e-05,
313
+ "loss": 0.0235,
314
  "step": 2100
315
  },
316
  {
317
+ "epoch": 16.79,
318
+ "eval_loss": 0.31389620900154114,
319
+ "eval_runtime": 198.2191,
320
+ "eval_samples_per_second": 18.136,
321
+ "eval_steps_per_second": 2.27,
322
+ "eval_wer": 0.21622207764042506,
323
  "step": 2100
324
  },
325
  {
326
+ "epoch": 17.6,
327
+ "learning_rate": 9.831945556445156e-05,
328
+ "loss": 0.0227,
329
  "step": 2200
330
  },
331
  {
332
+ "epoch": 17.6,
333
+ "eval_loss": 0.32037118077278137,
334
+ "eval_runtime": 202.9198,
335
+ "eval_samples_per_second": 17.716,
336
  "eval_steps_per_second": 2.218,
337
+ "eval_wer": 0.217360659292995,
338
  "step": 2200
339
  },
340
  {
341
+ "epoch": 18.4,
342
+ "learning_rate": 9.823939151321058e-05,
343
+ "loss": 0.0228,
344
  "step": 2300
345
  },
346
  {
347
+ "epoch": 18.4,
348
+ "eval_loss": 0.32169201970100403,
349
+ "eval_runtime": 193.9,
350
+ "eval_samples_per_second": 18.54,
351
+ "eval_steps_per_second": 2.321,
352
+ "eval_wer": 0.21757753198872262,
353
  "step": 2300
354
  },
355
  {
356
+ "epoch": 19.2,
357
+ "learning_rate": 9.815932746196959e-05,
358
+ "loss": 0.0217,
359
  "step": 2400
360
  },
361
  {
362
+ "epoch": 19.2,
363
+ "eval_loss": 0.3112569749355316,
364
+ "eval_runtime": 198.2135,
365
+ "eval_samples_per_second": 18.137,
366
+ "eval_steps_per_second": 2.27,
367
+ "eval_wer": 0.2170895684233355,
368
  "step": 2400
369
  },
370
  {
371
+ "epoch": 19.99,
372
+ "learning_rate": 9.807926341072858e-05,
373
+ "loss": 0.0212,
374
  "step": 2500
375
  },
376
  {
377
+ "epoch": 19.99,
378
+ "eval_loss": 0.31596991419792175,
379
+ "eval_runtime": 199.2987,
380
+ "eval_samples_per_second": 18.038,
381
+ "eval_steps_per_second": 2.258,
382
+ "eval_wer": 0.21351116894382996,
383
  "step": 2500
384
  },
385
  {
386
+ "epoch": 20.79,
387
+ "learning_rate": 9.79991993594876e-05,
388
+ "loss": 0.0216,
389
  "step": 2600
390
  },
391
  {
392
+ "epoch": 20.79,
393
+ "eval_loss": 0.3226161003112793,
394
+ "eval_runtime": 199.747,
395
+ "eval_samples_per_second": 17.998,
396
+ "eval_steps_per_second": 2.253,
397
+ "eval_wer": 0.21378225981348947,
398
  "step": 2600
399
  },
400
  {
401
+ "epoch": 21.6,
402
+ "learning_rate": 9.79191353082466e-05,
403
+ "loss": 0.0242,
404
  "step": 2700
405
  },
406
  {
407
+ "epoch": 21.6,
408
+ "eval_loss": 0.3281223177909851,
409
+ "eval_runtime": 197.1976,
410
+ "eval_samples_per_second": 18.23,
411
+ "eval_steps_per_second": 2.282,
412
+ "eval_wer": 0.2169269139015398,
413
  "step": 2700
414
  },
415
  {
416
+ "epoch": 22.4,
417
+ "learning_rate": 9.783907125700561e-05,
418
+ "loss": 0.0245,
419
  "step": 2800
420
  },
421
  {
422
+ "epoch": 22.4,
423
+ "eval_loss": 0.3078162968158722,
424
+ "eval_runtime": 198.6659,
425
+ "eval_samples_per_second": 18.096,
426
+ "eval_steps_per_second": 2.265,
427
+ "eval_wer": 0.2176317501626545,
428
  "step": 2800
429
  },
430
  {
431
+ "epoch": 23.2,
432
+ "learning_rate": 9.775900720576461e-05,
433
+ "loss": 0.0229,
434
  "step": 2900
435
  },
436
  {
437
+ "epoch": 23.2,
438
+ "eval_loss": 0.31995320320129395,
439
+ "eval_runtime": 204.328,
440
+ "eval_samples_per_second": 17.594,
441
+ "eval_steps_per_second": 2.202,
442
+ "eval_wer": 0.22045109520711342,
443
  "step": 2900
444
  },
445
  {
446
+ "epoch": 23.99,
447
+ "learning_rate": 9.767894315452363e-05,
448
+ "loss": 0.0226,
449
  "step": 3000
450
  },
451
  {
452
+ "epoch": 23.99,
453
+ "eval_loss": 0.33075791597366333,
454
+ "eval_runtime": 205.0698,
455
+ "eval_samples_per_second": 17.531,
456
+ "eval_steps_per_second": 2.194,
457
+ "eval_wer": 0.2171437865972674,
458
  "step": 3000
459
  },
460
  {
461
+ "epoch": 24.79,
462
+ "learning_rate": 9.759887910328262e-05,
463
+ "loss": 0.0213,
464
  "step": 3100
465
  },
466
  {
467
+ "epoch": 24.79,
468
+ "eval_loss": 0.3419627845287323,
469
+ "eval_runtime": 206.2312,
470
+ "eval_samples_per_second": 17.432,
471
+ "eval_steps_per_second": 2.182,
472
+ "eval_wer": 0.21936673172847537,
473
  "step": 3100
474
  },
475
  {
476
+ "epoch": 25.6,
477
+ "learning_rate": 9.751881505204163e-05,
478
+ "loss": 0.0212,
479
  "step": 3200
480
  },
481
  {
482
+ "epoch": 25.6,
483
+ "eval_loss": 0.3389272093772888,
484
+ "eval_runtime": 205.5665,
485
+ "eval_samples_per_second": 17.488,
486
+ "eval_steps_per_second": 2.189,
487
+ "eval_wer": 0.21519193233571893,
488
  "step": 3200
489
  },
490
  {
491
+ "epoch": 26.4,
492
+ "learning_rate": 9.743875100080065e-05,
493
+ "loss": 0.0208,
494
  "step": 3300
495
  },
496
  {
497
+ "epoch": 26.4,
498
+ "eval_loss": 0.32781022787094116,
499
+ "eval_runtime": 208.0862,
500
+ "eval_samples_per_second": 17.276,
501
+ "eval_steps_per_second": 2.163,
502
+ "eval_wer": 0.21275211450878334,
503
  "step": 3300
504
  },
505
  {
506
+ "epoch": 27.2,
507
+ "learning_rate": 9.735868694955965e-05,
508
+ "loss": 0.0212,
509
  "step": 3400
510
  },
511
  {
512
+ "epoch": 27.2,
513
+ "eval_loss": 0.3169388771057129,
514
+ "eval_runtime": 206.029,
515
+ "eval_samples_per_second": 17.449,
516
+ "eval_steps_per_second": 2.184,
517
+ "eval_wer": 0.21497505963999133,
518
  "step": 3400
519
  },
520
  {
521
+ "epoch": 27.99,
522
+ "learning_rate": 9.727862289831866e-05,
523
+ "loss": 0.0213,
524
  "step": 3500
525
  },
526
  {
527
+ "epoch": 27.99,
528
+ "eval_loss": 0.31786179542541504,
529
+ "eval_runtime": 198.632,
530
+ "eval_samples_per_second": 18.099,
531
+ "eval_steps_per_second": 2.265,
532
+ "eval_wer": 0.21757753198872262,
533
  "step": 3500
534
  },
535
  {
536
+ "epoch": 28.79,
537
+ "learning_rate": 9.719855884707766e-05,
538
+ "loss": 0.0213,
539
  "step": 3600
540
  },
541
  {
542
+ "epoch": 28.79,
543
+ "eval_loss": 0.32053136825561523,
544
+ "eval_runtime": 201.1109,
545
+ "eval_samples_per_second": 17.876,
546
+ "eval_steps_per_second": 2.238,
547
+ "eval_wer": 0.2203426588592496,
548
  "step": 3600
549
  },
550
  {
551
+ "epoch": 29.6,
552
+ "learning_rate": 9.711849479583668e-05,
553
+ "loss": 0.0217,
554
  "step": 3700
555
  },
556
  {
557
+ "epoch": 29.6,
558
+ "eval_loss": 0.33174052834510803,
559
+ "eval_runtime": 191.0157,
560
+ "eval_samples_per_second": 18.82,
561
+ "eval_steps_per_second": 2.356,
562
+ "eval_wer": 0.2168726957276079,
563
  "step": 3700
564
  },
565
  {
566
+ "epoch": 30.4,
567
+ "learning_rate": 9.703843074459567e-05,
568
+ "loss": 0.0221,
569
  "step": 3800
570
  },
571
  {
572
+ "epoch": 30.4,
573
+ "eval_loss": 0.3100583255290985,
574
+ "eval_runtime": 190.6873,
575
+ "eval_samples_per_second": 18.853,
576
+ "eval_steps_per_second": 2.36,
577
+ "eval_wer": 0.21774018651051832,
578
  "step": 3800
579
  },
580
  {
581
+ "epoch": 31.2,
582
+ "learning_rate": 9.695836669335468e-05,
583
+ "loss": 0.021,
584
  "step": 3900
585
  },
586
  {
587
+ "epoch": 31.2,
588
+ "eval_loss": 0.33132240176200867,
589
+ "eval_runtime": 187.5625,
590
+ "eval_samples_per_second": 19.167,
591
+ "eval_steps_per_second": 2.399,
592
+ "eval_wer": 0.21741487746692692,
593
  "step": 3900
594
  },
595
  {
596
+ "epoch": 31.99,
597
+ "learning_rate": 9.68783026421137e-05,
598
+ "loss": 0.0212,
599
  "step": 4000
600
  },
601
  {
602
+ "epoch": 31.99,
603
+ "eval_loss": 0.32063281536102295,
604
+ "eval_runtime": 192.7124,
605
+ "eval_samples_per_second": 18.655,
606
+ "eval_steps_per_second": 2.335,
607
+ "eval_wer": 0.21340273259596618,
608
  "step": 4000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
609
  }
610
  ],
611
+ "max_steps": 125000,
612
  "num_train_epochs": 1000,
613
+ "total_flos": 9.112861160368682e+19,
614
  "trial_name": null,
615
  "trial_params": null
616
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f52fa40ebbde5a5ec6af8d2b2cceb3adbc9f404cf3afe5cd25bb4606ecbb4bf0
3
  size 3323
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4775b7d54e5ed7fc71d018fd7cb01140ef925d4333446c094a86299d55c0701
3
  size 3323