DewiBrynJones commited on
Commit
b43332d
·
verified ·
1 Parent(s): 6cb46b7

End of training

Browse files
README.md CHANGED
@@ -2,6 +2,8 @@
2
  license: apache-2.0
3
  base_model: DewiBrynJones/wav2vec2-xlsr-53-ft-btb-cv-cy
4
  tags:
 
 
5
  - generated_from_trainer
6
  metrics:
7
  - wer
 
2
  license: apache-2.0
3
  base_model: DewiBrynJones/wav2vec2-xlsr-53-ft-btb-cv-cy
4
  tags:
5
+ - automatic-speech-recognition
6
+ - ./data-configs/btb.json
7
  - generated_from_trainer
8
  metrics:
9
  - wer
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 1.4240956992309883,
3
- "eval_loss": 0.43451622128486633,
4
- "eval_runtime": 181.7486,
5
- "eval_samples": 7022,
6
- "eval_samples_per_second": 38.636,
7
- "eval_steps_per_second": 0.605,
8
- "eval_wer": 0.3308175766353526,
9
- "total_flos": 4.5974516642218747e+18,
10
- "train_loss": 0.7817989181518554,
11
- "train_runtime": 11412.7197,
12
- "train_samples": 28086,
13
- "train_samples_per_second": 3.505,
14
- "train_steps_per_second": 0.876
15
  }
 
1
  {
2
+ "epoch": 1.0726161106939827,
3
+ "eval_loss": Infinity,
4
+ "eval_runtime": 104.9656,
5
+ "eval_samples": 3901,
6
+ "eval_samples_per_second": 37.165,
7
+ "eval_steps_per_second": 0.581,
8
+ "eval_wer": 0.3401922426701444,
9
+ "total_flos": 4.496412338111517e+18,
10
+ "train_loss": 0.8045109680175782,
11
+ "train_runtime": 7510.9356,
12
+ "train_samples": 37291,
13
+ "train_samples_per_second": 5.326,
14
+ "train_steps_per_second": 1.331
15
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 1.4240956992309883,
3
- "eval_loss": 0.43451622128486633,
4
- "eval_runtime": 181.7486,
5
- "eval_samples": 7022,
6
- "eval_samples_per_second": 38.636,
7
- "eval_steps_per_second": 0.605,
8
- "eval_wer": 0.3308175766353526
9
  }
 
1
  {
2
+ "epoch": 1.0726161106939827,
3
+ "eval_loss": Infinity,
4
+ "eval_runtime": 104.9656,
5
+ "eval_samples": 3901,
6
+ "eval_samples_per_second": 37.165,
7
+ "eval_steps_per_second": 0.581,
8
+ "eval_wer": 0.3401922426701444
9
  }
runs/Oct03_11-33-38_8523e2deba23/events.out.tfevents.1727960055.8523e2deba23.123.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6218c0956f391bb4b373bc0ebc63efad935ea81d03f6a3bf1d97812d9fc1959e
3
+ size 406
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 1.4240956992309883,
3
- "total_flos": 4.5974516642218747e+18,
4
- "train_loss": 0.7817989181518554,
5
- "train_runtime": 11412.7197,
6
- "train_samples": 28086,
7
- "train_samples_per_second": 3.505,
8
- "train_steps_per_second": 0.876
9
  }
 
1
  {
2
+ "epoch": 1.0726161106939827,
3
+ "total_flos": 4.496412338111517e+18,
4
+ "train_loss": 0.8045109680175782,
5
+ "train_runtime": 7510.9356,
6
+ "train_samples": 37291,
7
+ "train_samples_per_second": 5.326,
8
+ "train_steps_per_second": 1.331
9
  }
trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.4240956992309883,
5
  "eval_steps": 200,
6
  "global_step": 10000,
7
  "is_hyper_param_search": false,
@@ -9,603 +9,603 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.028481913984619765,
13
- "eval_loss": 1.2521600723266602,
14
- "eval_runtime": 184.7297,
15
- "eval_samples_per_second": 38.012,
16
- "eval_steps_per_second": 0.595,
17
- "eval_wer": 0.6291606319509959,
18
  "step": 200
19
  },
20
  {
21
- "epoch": 0.05696382796923953,
22
- "eval_loss": 0.6599467396736145,
23
- "eval_runtime": 185.0187,
24
- "eval_samples_per_second": 37.953,
25
- "eval_steps_per_second": 0.595,
26
- "eval_wer": 0.45444398676570247,
27
  "step": 400
28
  },
29
  {
30
- "epoch": 0.07120478496154942,
31
- "grad_norm": 18.865299224853516,
32
- "learning_rate": 0.00014879999999999998,
33
- "loss": 2.2791,
34
  "step": 500
35
  },
36
  {
37
- "epoch": 0.0854457419538593,
38
- "eval_loss": 0.6628636717796326,
39
- "eval_runtime": 185.7673,
40
- "eval_samples_per_second": 37.8,
41
- "eval_steps_per_second": 0.592,
42
- "eval_wer": 0.4394557461566059,
43
  "step": 600
44
  },
45
  {
46
- "epoch": 0.11392765593847906,
47
- "eval_loss": 0.7910040020942688,
48
- "eval_runtime": 186.4058,
49
- "eval_samples_per_second": 37.671,
50
- "eval_steps_per_second": 0.59,
51
- "eval_wer": 0.5453035517346763,
52
  "step": 800
53
  },
54
  {
55
- "epoch": 0.14240956992309883,
56
- "grad_norm": 3.8627092838287354,
57
  "learning_rate": 0.0002988,
58
- "loss": 0.8206,
59
  "step": 1000
60
  },
61
  {
62
- "epoch": 0.14240956992309883,
63
- "eval_loss": 0.7757941484451294,
64
- "eval_runtime": 186.8087,
65
- "eval_samples_per_second": 37.589,
66
- "eval_steps_per_second": 0.589,
67
- "eval_wer": 0.5701245033816553,
68
  "step": 1000
69
  },
70
  {
71
- "epoch": 0.1708914839077186,
72
- "eval_loss": 0.802534818649292,
73
- "eval_runtime": 187.4308,
74
- "eval_samples_per_second": 37.464,
75
- "eval_steps_per_second": 0.587,
76
- "eval_wer": 0.5782564211589312,
77
  "step": 1200
78
  },
79
  {
80
- "epoch": 0.19937339789233838,
81
- "eval_loss": 0.7715001106262207,
82
- "eval_runtime": 187.8412,
83
- "eval_samples_per_second": 37.383,
84
- "eval_steps_per_second": 0.586,
85
- "eval_wer": 0.5211336850077731,
86
  "step": 1400
87
  },
88
  {
89
- "epoch": 0.21361435488464825,
90
- "grad_norm": 11.042049407958984,
91
  "learning_rate": 0.00028346666666666665,
92
- "loss": 0.9068,
93
  "step": 1500
94
  },
95
  {
96
- "epoch": 0.22785531187695812,
97
- "eval_loss": 0.7349154949188232,
98
- "eval_runtime": 191.6788,
99
- "eval_samples_per_second": 36.634,
100
- "eval_steps_per_second": 0.574,
101
- "eval_wer": 0.512775880625573,
102
  "step": 1600
103
  },
104
  {
105
- "epoch": 0.2563372258615779,
106
- "eval_loss": 0.7257962226867676,
107
- "eval_runtime": 189.501,
108
- "eval_samples_per_second": 37.055,
109
- "eval_steps_per_second": 0.58,
110
- "eval_wer": 0.5152473458323922,
111
  "step": 1800
112
  },
113
  {
114
- "epoch": 0.28481913984619767,
115
- "grad_norm": 6.190296649932861,
116
  "learning_rate": 0.0002668,
117
- "loss": 0.8679,
118
  "step": 2000
119
  },
120
  {
121
- "epoch": 0.28481913984619767,
122
- "eval_loss": 0.7084089517593384,
123
- "eval_runtime": 188.267,
124
- "eval_samples_per_second": 37.298,
125
- "eval_steps_per_second": 0.584,
126
- "eval_wer": 0.5216386080070158,
127
  "step": 2000
128
  },
129
  {
130
- "epoch": 0.3133010538308174,
131
- "eval_loss": 0.6904259324073792,
132
- "eval_runtime": 188.556,
133
- "eval_samples_per_second": 37.241,
134
- "eval_steps_per_second": 0.583,
135
- "eval_wer": 0.5014151131426142,
136
  "step": 2200
137
  },
138
  {
139
- "epoch": 0.3417829678154372,
140
- "eval_loss": 0.6992842555046082,
141
- "eval_runtime": 189.0868,
142
- "eval_samples_per_second": 37.136,
143
- "eval_steps_per_second": 0.582,
144
- "eval_wer": 0.5177586733812567,
145
  "step": 2400
146
  },
147
  {
148
- "epoch": 0.3560239248077471,
149
- "grad_norm": 4.8257222175598145,
150
  "learning_rate": 0.0002501333333333333,
151
- "loss": 0.8577,
152
  "step": 2500
153
  },
154
  {
155
- "epoch": 0.37026488180005696,
156
- "eval_loss": 0.6746060848236084,
157
- "eval_runtime": 190.1492,
158
- "eval_samples_per_second": 36.929,
159
- "eval_steps_per_second": 0.578,
160
- "eval_wer": 0.48673248382253287,
161
  "step": 2600
162
  },
163
  {
164
- "epoch": 0.39874679578467676,
165
- "eval_loss": 0.6621994972229004,
166
- "eval_runtime": 189.6459,
167
- "eval_samples_per_second": 37.027,
168
- "eval_steps_per_second": 0.58,
169
- "eval_wer": 0.4962595835714001,
170
  "step": 2800
171
  },
172
  {
173
- "epoch": 0.4272287097692965,
174
- "grad_norm": 3.6695899963378906,
175
  "learning_rate": 0.00023346666666666666,
176
- "loss": 0.7995,
177
  "step": 3000
178
  },
179
  {
180
- "epoch": 0.4272287097692965,
181
- "eval_loss": 0.6793097853660583,
182
- "eval_runtime": 188.7722,
183
- "eval_samples_per_second": 37.198,
184
- "eval_steps_per_second": 0.583,
185
- "eval_wer": 0.49348250707556574,
186
  "step": 3000
187
  },
188
  {
189
- "epoch": 0.45571062375391624,
190
- "eval_loss": 0.6368467211723328,
191
- "eval_runtime": 188.0679,
192
- "eval_samples_per_second": 37.338,
193
- "eval_steps_per_second": 0.585,
194
- "eval_wer": 0.47005673740017806,
195
  "step": 3200
196
  },
197
  {
198
- "epoch": 0.48419253773853604,
199
- "eval_loss": 0.6363435387611389,
200
- "eval_runtime": 188.2666,
201
- "eval_samples_per_second": 37.298,
202
- "eval_steps_per_second": 0.584,
203
- "eval_wer": 0.478055780703969,
204
  "step": 3400
205
  },
206
  {
207
- "epoch": 0.4984334947308459,
208
- "grad_norm": 3.4502739906311035,
209
  "learning_rate": 0.0002168333333333333,
210
- "loss": 0.8141,
211
  "step": 3500
212
  },
213
  {
214
- "epoch": 0.5126744517231558,
215
- "eval_loss": 0.6217373609542847,
216
- "eval_runtime": 187.6755,
217
- "eval_samples_per_second": 37.416,
218
- "eval_steps_per_second": 0.586,
219
- "eval_wer": 0.46555229274904,
220
  "step": 3600
221
  },
222
  {
223
- "epoch": 0.5411563657077756,
224
- "eval_loss": 0.641762912273407,
225
- "eval_runtime": 186.9231,
226
- "eval_samples_per_second": 37.566,
227
- "eval_steps_per_second": 0.588,
228
- "eval_wer": 0.4940140049695053,
229
  "step": 3800
230
  },
231
  {
232
- "epoch": 0.5696382796923953,
233
- "grad_norm": 5.877405643463135,
234
- "learning_rate": 0.00020016666666666666,
235
- "loss": 0.7953,
236
  "step": 4000
237
  },
238
  {
239
- "epoch": 0.5696382796923953,
240
- "eval_loss": 0.6017736196517944,
241
- "eval_runtime": 182.787,
242
- "eval_samples_per_second": 38.416,
243
- "eval_steps_per_second": 0.602,
244
- "eval_wer": 0.4542313876081266,
245
  "step": 4000
246
  },
247
  {
248
- "epoch": 0.5981201936770151,
249
- "eval_loss": 0.5962206721305847,
250
- "eval_runtime": 183.0007,
251
- "eval_samples_per_second": 38.371,
252
- "eval_steps_per_second": 0.601,
253
- "eval_wer": 0.4580315975497947,
254
  "step": 4200
255
  },
256
  {
257
- "epoch": 0.6266021076616348,
258
- "eval_loss": 0.5883399844169617,
259
- "eval_runtime": 182.7298,
260
- "eval_samples_per_second": 38.428,
261
- "eval_steps_per_second": 0.602,
262
- "eval_wer": 0.44590015812062345,
263
  "step": 4400
264
  },
265
  {
266
- "epoch": 0.6408430646539447,
267
- "grad_norm": 3.615546226501465,
268
  "learning_rate": 0.0001835333333333333,
269
- "loss": 0.7596,
270
  "step": 4500
271
  },
272
  {
273
- "epoch": 0.6550840216462547,
274
- "eval_loss": 0.578825056552887,
275
- "eval_runtime": 183.3674,
276
- "eval_samples_per_second": 38.295,
277
- "eval_steps_per_second": 0.6,
278
- "eval_wer": 0.43253298608804264,
279
  "step": 4600
280
  },
281
  {
282
- "epoch": 0.6835659356308744,
283
- "eval_loss": 0.5708740949630737,
284
- "eval_runtime": 182.6951,
285
- "eval_samples_per_second": 38.436,
286
- "eval_steps_per_second": 0.602,
287
- "eval_wer": 0.4412362641013035,
288
  "step": 4800
289
  },
290
  {
291
- "epoch": 0.7120478496154942,
292
- "grad_norm": 4.345168590545654,
293
  "learning_rate": 0.0001669,
294
- "loss": 0.7533,
295
  "step": 5000
296
  },
297
  {
298
- "epoch": 0.7120478496154942,
299
- "eval_loss": 0.5594890117645264,
300
- "eval_runtime": 182.5857,
301
- "eval_samples_per_second": 38.459,
302
- "eval_steps_per_second": 0.602,
303
- "eval_wer": 0.4352170504524376,
304
  "step": 5000
305
  },
306
  {
307
- "epoch": 0.7405297636001139,
308
- "eval_loss": 0.5545539259910583,
309
- "eval_runtime": 182.2233,
310
- "eval_samples_per_second": 38.535,
311
- "eval_steps_per_second": 0.604,
312
- "eval_wer": 0.4231786231547057,
313
  "step": 5200
314
  },
315
  {
316
- "epoch": 0.7690116775847337,
317
- "eval_loss": 0.5545418858528137,
318
- "eval_runtime": 182.2691,
319
- "eval_samples_per_second": 38.525,
320
- "eval_steps_per_second": 0.604,
321
- "eval_wer": 0.4244276432054638,
322
  "step": 5400
323
  },
324
  {
325
- "epoch": 0.7832526345770435,
326
- "grad_norm": 9.471431732177734,
327
- "learning_rate": 0.00015026666666666667,
328
- "loss": 0.7591,
329
  "step": 5500
330
  },
331
  {
332
- "epoch": 0.7974935915693535,
333
- "eval_loss": 0.5442594885826111,
334
- "eval_runtime": 182.3947,
335
- "eval_samples_per_second": 38.499,
336
- "eval_steps_per_second": 0.603,
337
- "eval_wer": 0.4076455972043211,
338
  "step": 5600
339
  },
340
  {
341
- "epoch": 0.8259755055539733,
342
- "eval_loss": 0.5341240763664246,
343
- "eval_runtime": 182.0603,
344
- "eval_samples_per_second": 38.57,
345
- "eval_steps_per_second": 0.604,
346
- "eval_wer": 0.41462150706227824,
347
  "step": 5800
348
  },
349
  {
350
- "epoch": 0.854457419538593,
351
- "grad_norm": 4.406210422515869,
352
- "learning_rate": 0.00013363333333333332,
353
- "loss": 0.6621,
354
  "step": 6000
355
  },
356
  {
357
- "epoch": 0.854457419538593,
358
- "eval_loss": 0.5104002952575684,
359
- "eval_runtime": 181.8706,
360
- "eval_samples_per_second": 38.61,
361
- "eval_steps_per_second": 0.605,
362
- "eval_wer": 0.3955141577751498,
363
  "step": 6000
364
  },
365
  {
366
- "epoch": 0.8829393335232127,
367
- "eval_loss": 0.5139421820640564,
368
- "eval_runtime": 181.902,
369
- "eval_samples_per_second": 38.603,
370
- "eval_steps_per_second": 0.605,
371
- "eval_wer": 0.40112146055621256,
372
  "step": 6200
373
  },
374
  {
375
- "epoch": 0.9114212475078325,
376
- "eval_loss": 0.5044221878051758,
377
- "eval_runtime": 181.9538,
378
- "eval_samples_per_second": 38.592,
379
- "eval_steps_per_second": 0.605,
380
- "eval_wer": 0.38039304269256835,
381
  "step": 6400
382
  },
383
  {
384
- "epoch": 0.9256622045001424,
385
- "grad_norm": 8.09687328338623,
386
- "learning_rate": 0.000117,
387
- "loss": 0.6705,
388
  "step": 6500
389
  },
390
  {
391
- "epoch": 0.9399031614924523,
392
- "eval_loss": 0.49985769391059875,
393
- "eval_runtime": 182.1414,
394
- "eval_samples_per_second": 38.552,
395
- "eval_steps_per_second": 0.604,
396
- "eval_wer": 0.3896012437050718,
397
  "step": 6600
398
  },
399
  {
400
- "epoch": 0.9683850754770721,
401
- "eval_loss": 0.5097447037696838,
402
- "eval_runtime": 181.5418,
403
- "eval_samples_per_second": 38.68,
404
- "eval_steps_per_second": 0.606,
405
- "eval_wer": 0.4052804315762899,
406
  "step": 6800
407
  },
408
  {
409
- "epoch": 0.9968669894616918,
410
- "grad_norm": 4.639442443847656,
411
- "learning_rate": 0.00010033333333333332,
412
- "loss": 0.6665,
413
  "step": 7000
414
  },
415
  {
416
- "epoch": 0.9968669894616918,
417
- "eval_loss": 0.49253013730049133,
418
- "eval_runtime": 181.6405,
419
- "eval_samples_per_second": 38.659,
420
- "eval_steps_per_second": 0.606,
421
- "eval_wer": 0.3784796502743858,
422
  "step": 7000
423
  },
424
  {
425
- "epoch": 1.0253489034463117,
426
- "eval_loss": 0.4896470010280609,
427
- "eval_runtime": 181.3934,
428
- "eval_samples_per_second": 38.711,
429
- "eval_steps_per_second": 0.606,
430
- "eval_wer": 0.3688728258414276,
431
  "step": 7200
432
  },
433
  {
434
- "epoch": 1.0538308174309314,
435
- "eval_loss": 0.47494611144065857,
436
- "eval_runtime": 181.7386,
437
- "eval_samples_per_second": 38.638,
438
- "eval_steps_per_second": 0.605,
439
- "eval_wer": 0.3687399513679427,
440
  "step": 7400
441
  },
442
  {
443
- "epoch": 1.0680717744232413,
444
- "grad_norm": 0.6623511910438538,
445
- "learning_rate": 8.366666666666666e-05,
446
- "loss": 0.5826,
447
  "step": 7500
448
  },
449
  {
450
- "epoch": 1.0823127314155512,
451
- "eval_loss": 0.4684299826622009,
452
- "eval_runtime": 182.4026,
453
- "eval_samples_per_second": 38.497,
454
- "eval_steps_per_second": 0.603,
455
- "eval_wer": 0.3628004624031677,
456
  "step": 7600
457
  },
458
  {
459
- "epoch": 1.110794645400171,
460
- "eval_loss": 0.47290024161338806,
461
- "eval_runtime": 182.1043,
462
- "eval_samples_per_second": 38.56,
463
- "eval_steps_per_second": 0.604,
464
- "eval_wer": 0.358495329462257,
465
  "step": 7800
466
  },
467
  {
468
- "epoch": 1.1392765593847907,
469
- "grad_norm": 2.393817186355591,
470
- "learning_rate": 6.699999999999999e-05,
471
- "loss": 0.5836,
472
  "step": 8000
473
  },
474
  {
475
- "epoch": 1.1392765593847907,
476
- "eval_loss": 0.46409761905670166,
477
- "eval_runtime": 181.7327,
478
- "eval_samples_per_second": 38.639,
479
- "eval_steps_per_second": 0.605,
480
- "eval_wer": 0.3553196295459679,
481
  "step": 8000
482
  },
483
  {
484
- "epoch": 1.1677584733694104,
485
- "eval_loss": 0.45749881863594055,
486
- "eval_runtime": 181.5866,
487
- "eval_samples_per_second": 38.67,
488
- "eval_steps_per_second": 0.606,
489
- "eval_wer": 0.3529810388126337,
490
  "step": 8200
491
  },
492
  {
493
- "epoch": 1.1962403873540302,
494
- "eval_loss": 0.45851147174835205,
495
- "eval_runtime": 181.5801,
496
- "eval_samples_per_second": 38.672,
497
- "eval_steps_per_second": 0.606,
498
- "eval_wer": 0.3485563188455866,
499
  "step": 8400
500
  },
501
  {
502
- "epoch": 1.21048134434634,
503
- "grad_norm": 1.9676859378814697,
504
- "learning_rate": 5.033333333333333e-05,
505
- "loss": 0.5199,
506
  "step": 8500
507
  },
508
  {
509
- "epoch": 1.22472230133865,
510
- "eval_loss": 0.4548875391483307,
511
- "eval_runtime": 182.6274,
512
- "eval_samples_per_second": 38.45,
513
- "eval_steps_per_second": 0.602,
514
- "eval_wer": 0.3450750076402822,
515
  "step": 8600
516
  },
517
  {
518
- "epoch": 1.2532042153232696,
519
- "eval_loss": 0.4520675539970398,
520
- "eval_runtime": 182.8881,
521
- "eval_samples_per_second": 38.395,
522
- "eval_steps_per_second": 0.601,
523
- "eval_wer": 0.34082302448876545,
524
  "step": 8800
525
  },
526
  {
527
- "epoch": 1.2816861293078894,
528
- "grad_norm": 1.1400251388549805,
529
- "learning_rate": 3.373333333333333e-05,
530
- "loss": 0.5268,
531
  "step": 9000
532
  },
533
  {
534
- "epoch": 1.2816861293078894,
535
- "eval_loss": 0.44252264499664307,
536
- "eval_runtime": 182.3349,
537
- "eval_samples_per_second": 38.512,
538
- "eval_steps_per_second": 0.603,
539
- "eval_wer": 0.33950756720126496,
540
  "step": 9000
541
  },
542
  {
543
- "epoch": 1.3101680432925091,
544
- "eval_loss": 0.44072064757347107,
545
- "eval_runtime": 184.1579,
546
- "eval_samples_per_second": 38.13,
547
- "eval_steps_per_second": 0.597,
548
- "eval_wer": 0.3361857053641425,
549
  "step": 9200
550
  },
551
  {
552
- "epoch": 1.338649957277129,
553
- "eval_loss": 0.4383063018321991,
554
- "eval_runtime": 181.6966,
555
- "eval_samples_per_second": 38.647,
556
- "eval_steps_per_second": 0.605,
557
- "eval_wer": 0.33397998910429316,
558
  "step": 9400
559
  },
560
  {
561
- "epoch": 1.352890914269439,
562
- "grad_norm": 1.0755033493041992,
563
- "learning_rate": 1.71e-05,
564
- "loss": 0.5013,
565
  "step": 9500
566
  },
567
  {
568
- "epoch": 1.3671318712617488,
569
- "eval_loss": 0.4356846809387207,
570
- "eval_runtime": 183.1225,
571
- "eval_samples_per_second": 38.346,
572
- "eval_steps_per_second": 0.601,
573
- "eval_wer": 0.33253165734330775,
574
  "step": 9600
575
  },
576
  {
577
- "epoch": 1.3956137852463686,
578
- "eval_loss": 0.43495818972587585,
579
- "eval_runtime": 182.2639,
580
- "eval_samples_per_second": 38.527,
581
- "eval_steps_per_second": 0.604,
582
- "eval_wer": 0.3316812607130044,
583
  "step": 9800
584
  },
585
  {
586
- "epoch": 1.4240956992309883,
587
- "grad_norm": 1.6312005519866943,
588
- "learning_rate": 4.666666666666666e-07,
589
- "loss": 0.5095,
590
  "step": 10000
591
  },
592
  {
593
- "epoch": 1.4240956992309883,
594
- "eval_loss": 0.43451622128486633,
595
- "eval_runtime": 182.2078,
596
- "eval_samples_per_second": 38.538,
597
- "eval_steps_per_second": 0.604,
598
- "eval_wer": 0.3308175766353526,
599
  "step": 10000
600
  },
601
  {
602
- "epoch": 1.4240956992309883,
603
  "step": 10000,
604
- "total_flos": 4.5974516642218747e+18,
605
- "train_loss": 0.7817989181518554,
606
- "train_runtime": 11412.7197,
607
- "train_samples_per_second": 3.505,
608
- "train_steps_per_second": 0.876
609
  }
610
  ],
611
  "logging_steps": 500,
@@ -625,7 +625,7 @@
625
  "attributes": {}
626
  }
627
  },
628
- "total_flos": 4.5974516642218747e+18,
629
  "train_batch_size": 4,
630
  "trial_name": null,
631
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0726161106939827,
5
  "eval_steps": 200,
6
  "global_step": 10000,
7
  "is_hyper_param_search": false,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.021452322213879653,
13
+ "eval_loss": Infinity,
14
+ "eval_runtime": 109.0854,
15
+ "eval_samples_per_second": 35.761,
16
+ "eval_steps_per_second": 0.559,
17
+ "eval_wer": 0.5591701685746027,
18
  "step": 200
19
  },
20
  {
21
+ "epoch": 0.042904644427759306,
22
+ "eval_loss": Infinity,
23
+ "eval_runtime": 105.5961,
24
+ "eval_samples_per_second": 36.943,
25
+ "eval_steps_per_second": 0.578,
26
+ "eval_wer": 0.4289474955320485,
27
  "step": 400
28
  },
29
  {
30
+ "epoch": 0.05363080553469913,
31
+ "grad_norm": 2.6033225059509277,
32
+ "learning_rate": 0.0001494,
33
+ "loss": 2.1964,
34
  "step": 500
35
  },
36
  {
37
+ "epoch": 0.06435696664163895,
38
+ "eval_loss": Infinity,
39
+ "eval_runtime": 104.8581,
40
+ "eval_samples_per_second": 37.203,
41
+ "eval_steps_per_second": 0.582,
42
+ "eval_wer": 0.43744867893542,
43
  "step": 600
44
  },
45
  {
46
+ "epoch": 0.08580928885551861,
47
+ "eval_loss": Infinity,
48
+ "eval_runtime": 104.5471,
49
+ "eval_samples_per_second": 37.313,
50
+ "eval_steps_per_second": 0.583,
51
+ "eval_wer": 0.4944452494807516,
52
  "step": 800
53
  },
54
  {
55
+ "epoch": 0.10726161106939826,
56
+ "grad_norm": 4.917770862579346,
57
  "learning_rate": 0.0002988,
58
+ "loss": 0.8327,
59
  "step": 1000
60
  },
61
  {
62
+ "epoch": 0.10726161106939826,
63
+ "eval_loss": Infinity,
64
+ "eval_runtime": 104.2475,
65
+ "eval_samples_per_second": 37.421,
66
+ "eval_steps_per_second": 0.585,
67
+ "eval_wer": 0.5149736753127566,
68
  "step": 1000
69
  },
70
  {
71
+ "epoch": 0.1287139332832779,
72
+ "eval_loss": Infinity,
73
+ "eval_runtime": 104.0588,
74
+ "eval_samples_per_second": 37.488,
75
+ "eval_steps_per_second": 0.586,
76
+ "eval_wer": 0.5633966091870743,
77
  "step": 1200
78
  },
79
  {
80
+ "epoch": 0.15016625549715756,
81
+ "eval_loss": Infinity,
82
+ "eval_runtime": 104.6247,
83
+ "eval_samples_per_second": 37.286,
84
+ "eval_steps_per_second": 0.583,
85
+ "eval_wer": 0.5355021011447616,
86
  "step": 1400
87
  },
88
  {
89
+ "epoch": 0.1608924166040974,
90
+ "grad_norm": 2.821734666824341,
91
  "learning_rate": 0.00028346666666666665,
92
+ "loss": 0.91,
93
  "step": 1500
94
  },
95
  {
96
+ "epoch": 0.17161857771103722,
97
+ "eval_loss": Infinity,
98
+ "eval_runtime": 104.8778,
99
+ "eval_samples_per_second": 37.196,
100
+ "eval_steps_per_second": 0.582,
101
+ "eval_wer": 0.515239337294112,
102
  "step": 1600
103
  },
104
  {
105
+ "epoch": 0.19307089992491688,
106
+ "eval_loss": Infinity,
107
+ "eval_runtime": 105.4509,
108
+ "eval_samples_per_second": 36.994,
109
+ "eval_steps_per_second": 0.578,
110
+ "eval_wer": 0.5594599816451722,
111
  "step": 1800
112
  },
113
  {
114
+ "epoch": 0.21452322213879652,
115
+ "grad_norm": 9.015162467956543,
116
  "learning_rate": 0.0002668,
117
+ "loss": 0.8721,
118
  "step": 2000
119
  },
120
  {
121
+ "epoch": 0.21452322213879652,
122
+ "eval_loss": Infinity,
123
+ "eval_runtime": 105.3779,
124
+ "eval_samples_per_second": 37.019,
125
+ "eval_steps_per_second": 0.579,
126
+ "eval_wer": 0.5056513548761049,
127
  "step": 2000
128
  },
129
  {
130
+ "epoch": 0.23597554435267618,
131
+ "eval_loss": Infinity,
132
+ "eval_runtime": 105.6302,
133
+ "eval_samples_per_second": 36.931,
134
+ "eval_steps_per_second": 0.577,
135
+ "eval_wer": 0.5041298362556151,
136
  "step": 2200
137
  },
138
  {
139
+ "epoch": 0.2574278665665558,
140
+ "eval_loss": Infinity,
141
+ "eval_runtime": 105.5892,
142
+ "eval_samples_per_second": 36.945,
143
+ "eval_steps_per_second": 0.578,
144
+ "eval_wer": 0.5145631067961165,
145
  "step": 2400
146
  },
147
  {
148
+ "epoch": 0.26815402767349567,
149
+ "grad_norm": 5.016167163848877,
150
  "learning_rate": 0.0002501333333333333,
151
+ "loss": 0.8218,
152
  "step": 2500
153
  },
154
  {
155
+ "epoch": 0.27888018878043547,
156
+ "eval_loss": Infinity,
157
+ "eval_runtime": 106.4951,
158
+ "eval_samples_per_second": 36.631,
159
+ "eval_steps_per_second": 0.573,
160
+ "eval_wer": 0.5018113316910593,
161
  "step": 2600
162
  },
163
  {
164
+ "epoch": 0.3003325109943151,
165
+ "eval_loss": Infinity,
166
+ "eval_runtime": 106.2902,
167
+ "eval_samples_per_second": 36.701,
168
+ "eval_steps_per_second": 0.574,
169
+ "eval_wer": 0.5090566584552964,
170
  "step": 2800
171
  },
172
  {
173
+ "epoch": 0.3217848332081948,
174
+ "grad_norm": 2.5943267345428467,
175
  "learning_rate": 0.00023346666666666666,
176
+ "loss": 0.8469,
177
  "step": 3000
178
  },
179
  {
180
+ "epoch": 0.3217848332081948,
181
+ "eval_loss": Infinity,
182
+ "eval_runtime": 105.8685,
183
+ "eval_samples_per_second": 36.848,
184
+ "eval_steps_per_second": 0.576,
185
+ "eval_wer": 0.5036709655605468,
186
  "step": 3000
187
  },
188
  {
189
+ "epoch": 0.34323715542207445,
190
+ "eval_loss": Infinity,
191
+ "eval_runtime": 105.9311,
192
+ "eval_samples_per_second": 36.826,
193
+ "eval_steps_per_second": 0.576,
194
+ "eval_wer": 0.4703183113558421,
195
  "step": 3200
196
  },
197
  {
198
+ "epoch": 0.3646894776359541,
199
+ "eval_loss": Infinity,
200
+ "eval_runtime": 105.4279,
201
+ "eval_samples_per_second": 37.002,
202
+ "eval_steps_per_second": 0.579,
203
+ "eval_wer": 0.47951987634642323,
204
  "step": 3400
205
  },
206
  {
207
+ "epoch": 0.3754156387428939,
208
+ "grad_norm": 4.555402755737305,
209
  "learning_rate": 0.0002168333333333333,
210
+ "loss": 0.8142,
211
  "step": 3500
212
  },
213
  {
214
+ "epoch": 0.38614179984983377,
215
+ "eval_loss": Infinity,
216
+ "eval_runtime": 105.6085,
217
+ "eval_samples_per_second": 36.938,
218
+ "eval_steps_per_second": 0.578,
219
+ "eval_wer": 0.4714051103704777,
220
  "step": 3600
221
  },
222
  {
223
+ "epoch": 0.40759412206371337,
224
+ "eval_loss": Infinity,
225
+ "eval_runtime": 105.5848,
226
+ "eval_samples_per_second": 36.947,
227
+ "eval_steps_per_second": 0.578,
228
+ "eval_wer": 0.4553929382215138,
229
  "step": 3800
230
  },
231
  {
232
+ "epoch": 0.42904644427759303,
233
+ "grad_norm": 15.551188468933105,
234
+ "learning_rate": 0.0002002,
235
+ "loss": 0.8085,
236
  "step": 4000
237
  },
238
  {
239
+ "epoch": 0.42904644427759303,
240
+ "eval_loss": Infinity,
241
+ "eval_runtime": 105.8874,
242
+ "eval_samples_per_second": 36.841,
243
+ "eval_steps_per_second": 0.576,
244
+ "eval_wer": 0.4505868714679032,
245
  "step": 4000
246
  },
247
  {
248
+ "epoch": 0.4504987664914727,
249
+ "eval_loss": Infinity,
250
+ "eval_runtime": 107.6738,
251
+ "eval_samples_per_second": 36.23,
252
+ "eval_steps_per_second": 0.567,
253
+ "eval_wer": 0.4457566536250785,
254
  "step": 4200
255
  },
256
  {
257
+ "epoch": 0.47195108870535235,
258
+ "eval_loss": Infinity,
259
+ "eval_runtime": 105.5778,
260
+ "eval_samples_per_second": 36.949,
261
+ "eval_steps_per_second": 0.578,
262
+ "eval_wer": 0.43669999516978214,
263
  "step": 4400
264
  },
265
  {
266
+ "epoch": 0.4826772498122922,
267
+ "grad_norm": 4.841684818267822,
268
  "learning_rate": 0.0001835333333333333,
269
+ "loss": 0.7802,
270
  "step": 4500
271
  },
272
  {
273
+ "epoch": 0.493403410919232,
274
+ "eval_loss": Infinity,
275
+ "eval_runtime": 105.9573,
276
+ "eval_samples_per_second": 36.817,
277
+ "eval_steps_per_second": 0.576,
278
+ "eval_wer": 0.4401052987489736,
279
  "step": 4600
280
  },
281
  {
282
+ "epoch": 0.5148557331331116,
283
+ "eval_loss": Infinity,
284
+ "eval_runtime": 105.3523,
285
+ "eval_samples_per_second": 37.028,
286
+ "eval_steps_per_second": 0.579,
287
+ "eval_wer": 0.43336714485823313,
288
  "step": 4800
289
  },
290
  {
291
+ "epoch": 0.5363080553469913,
292
+ "grad_norm": 7.372885227203369,
293
  "learning_rate": 0.0001669,
294
+ "loss": 0.7493,
295
  "step": 5000
296
  },
297
  {
298
+ "epoch": 0.5363080553469913,
299
+ "eval_loss": Infinity,
300
+ "eval_runtime": 107.6356,
301
+ "eval_samples_per_second": 36.243,
302
+ "eval_steps_per_second": 0.567,
303
+ "eval_wer": 0.4224267014442351,
304
  "step": 5000
305
  },
306
  {
307
+ "epoch": 0.5577603775608709,
308
+ "eval_loss": Infinity,
309
+ "eval_runtime": 107.4854,
310
+ "eval_samples_per_second": 36.293,
311
+ "eval_steps_per_second": 0.568,
312
+ "eval_wer": 0.43278751871709414,
313
  "step": 5200
314
  },
315
  {
316
+ "epoch": 0.5792126997747507,
317
+ "eval_loss": Infinity,
318
+ "eval_runtime": 105.275,
319
+ "eval_samples_per_second": 37.055,
320
+ "eval_steps_per_second": 0.579,
321
+ "eval_wer": 0.41764478577983866,
322
  "step": 5400
323
  },
324
  {
325
+ "epoch": 0.5899388608816905,
326
+ "grad_norm": 2.7961230278015137,
327
+ "learning_rate": 0.00015023333333333332,
328
+ "loss": 0.7668,
329
  "step": 5500
330
  },
331
  {
332
+ "epoch": 0.6006650219886303,
333
+ "eval_loss": Infinity,
334
+ "eval_runtime": 105.464,
335
+ "eval_samples_per_second": 36.989,
336
+ "eval_steps_per_second": 0.578,
337
+ "eval_wer": 0.41829686518862,
338
  "step": 5600
339
  },
340
  {
341
+ "epoch": 0.62211734420251,
342
+ "eval_loss": Infinity,
343
+ "eval_runtime": 104.96,
344
+ "eval_samples_per_second": 37.167,
345
+ "eval_steps_per_second": 0.581,
346
+ "eval_wer": 0.40296092353765156,
347
  "step": 5800
348
  },
349
  {
350
+ "epoch": 0.6435696664163896,
351
+ "grad_norm": 6.007960319519043,
352
+ "learning_rate": 0.0001336,
353
+ "loss": 0.6999,
354
  "step": 6000
355
  },
356
  {
357
+ "epoch": 0.6435696664163896,
358
+ "eval_loss": Infinity,
359
+ "eval_runtime": 104.9116,
360
+ "eval_samples_per_second": 37.184,
361
+ "eval_steps_per_second": 0.581,
362
+ "eval_wer": 0.4124523015988021,
363
  "step": 6000
364
  },
365
  {
366
+ "epoch": 0.6650219886302692,
367
+ "eval_loss": Infinity,
368
+ "eval_runtime": 108.5507,
369
+ "eval_samples_per_second": 35.937,
370
+ "eval_steps_per_second": 0.562,
371
+ "eval_wer": 0.40759793266676325,
372
  "step": 6200
373
  },
374
  {
375
+ "epoch": 0.6864743108441489,
376
+ "eval_loss": Infinity,
377
+ "eval_runtime": 104.9858,
378
+ "eval_samples_per_second": 37.157,
379
+ "eval_steps_per_second": 0.581,
380
+ "eval_wer": 0.39170651596386996,
381
  "step": 6400
382
  },
383
  {
384
+ "epoch": 0.6972004719510887,
385
+ "grad_norm": 44.30250549316406,
386
+ "learning_rate": 0.00011693333333333332,
387
+ "loss": 0.6918,
388
  "step": 6500
389
  },
390
  {
391
+ "epoch": 0.7079266330580285,
392
+ "eval_loss": Infinity,
393
+ "eval_runtime": 106.5414,
394
+ "eval_samples_per_second": 36.615,
395
+ "eval_steps_per_second": 0.573,
396
+ "eval_wer": 0.4004250591701686,
397
  "step": 6600
398
  },
399
  {
400
+ "epoch": 0.7293789552719082,
401
+ "eval_loss": Infinity,
402
+ "eval_runtime": 104.9171,
403
+ "eval_samples_per_second": 37.182,
404
+ "eval_steps_per_second": 0.581,
405
+ "eval_wer": 0.38653818287204755,
406
  "step": 6800
407
  },
408
  {
409
+ "epoch": 0.7508312774857878,
410
+ "grad_norm": 3.788344144821167,
411
+ "learning_rate": 0.00010029999999999998,
412
+ "loss": 0.6888,
413
  "step": 7000
414
  },
415
  {
416
+ "epoch": 0.7508312774857878,
417
+ "eval_loss": Infinity,
418
+ "eval_runtime": 105.3057,
419
+ "eval_samples_per_second": 37.045,
420
+ "eval_steps_per_second": 0.579,
421
+ "eval_wer": 0.3785200212529585,
422
  "step": 7000
423
  },
424
  {
425
+ "epoch": 0.7722835996996675,
426
+ "eval_loss": Infinity,
427
+ "eval_runtime": 104.7325,
428
+ "eval_samples_per_second": 37.247,
429
+ "eval_steps_per_second": 0.582,
430
+ "eval_wer": 0.3824083466164324,
431
  "step": 7200
432
  },
433
  {
434
+ "epoch": 0.7937359219135471,
435
+ "eval_loss": Infinity,
436
+ "eval_runtime": 105.0488,
437
+ "eval_samples_per_second": 37.135,
438
+ "eval_steps_per_second": 0.581,
439
+ "eval_wer": 0.37426942955127274,
440
  "step": 7400
441
  },
442
  {
443
+ "epoch": 0.8044620830204869,
444
+ "grad_norm": 5.486635684967041,
445
+ "learning_rate": 8.363333333333332e-05,
446
+ "loss": 0.646,
447
  "step": 7500
448
  },
449
  {
450
+ "epoch": 0.8151882441274267,
451
+ "eval_loss": Infinity,
452
+ "eval_runtime": 106.6481,
453
+ "eval_samples_per_second": 36.578,
454
+ "eval_steps_per_second": 0.572,
455
+ "eval_wer": 0.3673139158576052,
456
  "step": 7600
457
  },
458
  {
459
+ "epoch": 0.8366405663413065,
460
+ "eval_loss": Infinity,
461
+ "eval_runtime": 104.8533,
462
+ "eval_samples_per_second": 37.204,
463
+ "eval_steps_per_second": 0.582,
464
+ "eval_wer": 0.36668598753803794,
465
  "step": 7800
466
  },
467
  {
468
+ "epoch": 0.8580928885551861,
469
+ "grad_norm": 3.9184212684631348,
470
+ "learning_rate": 6.696666666666666e-05,
471
+ "loss": 0.6324,
472
  "step": 8000
473
  },
474
  {
475
+ "epoch": 0.8580928885551861,
476
+ "eval_loss": Infinity,
477
+ "eval_runtime": 105.6572,
478
+ "eval_samples_per_second": 36.921,
479
+ "eval_steps_per_second": 0.577,
480
+ "eval_wer": 0.3661546635753272,
481
  "step": 8000
482
  },
483
  {
484
+ "epoch": 0.8795452107690658,
485
+ "eval_loss": Infinity,
486
+ "eval_runtime": 104.6274,
487
+ "eval_samples_per_second": 37.285,
488
+ "eval_steps_per_second": 0.583,
489
+ "eval_wer": 0.36009274018258225,
490
  "step": 8200
491
  },
492
  {
493
+ "epoch": 0.9009975329829454,
494
+ "eval_loss": Infinity,
495
+ "eval_runtime": 104.9439,
496
+ "eval_samples_per_second": 37.172,
497
+ "eval_steps_per_second": 0.581,
498
+ "eval_wer": 0.35345119064869823,
499
  "step": 8400
500
  },
501
  {
502
+ "epoch": 0.9117236940898852,
503
+ "grad_norm": 3.5586395263671875,
504
+ "learning_rate": 5.0299999999999996e-05,
505
+ "loss": 0.6221,
506
  "step": 8500
507
  },
508
  {
509
+ "epoch": 0.9224498551968251,
510
+ "eval_loss": Infinity,
511
+ "eval_runtime": 105.157,
512
+ "eval_samples_per_second": 37.097,
513
+ "eval_steps_per_second": 0.58,
514
+ "eval_wer": 0.35258175143698983,
515
  "step": 8600
516
  },
517
  {
518
+ "epoch": 0.9439021774107047,
519
+ "eval_loss": Infinity,
520
+ "eval_runtime": 105.1867,
521
+ "eval_samples_per_second": 37.086,
522
+ "eval_steps_per_second": 0.58,
523
+ "eval_wer": 0.34874172825194416,
524
  "step": 8800
525
  },
526
  {
527
+ "epoch": 0.9653544996245844,
528
+ "grad_norm": 3.914166212081909,
529
+ "learning_rate": 3.363333333333333e-05,
530
+ "loss": 0.6215,
531
  "step": 9000
532
  },
533
  {
534
+ "epoch": 0.9653544996245844,
535
+ "eval_loss": Infinity,
536
+ "eval_runtime": 105.9335,
537
+ "eval_samples_per_second": 36.825,
538
+ "eval_steps_per_second": 0.576,
539
+ "eval_wer": 0.34811379993237695,
540
  "step": 9000
541
  },
542
  {
543
+ "epoch": 0.986806821838464,
544
+ "eval_loss": Infinity,
545
+ "eval_runtime": 105.4283,
546
+ "eval_samples_per_second": 37.001,
547
+ "eval_steps_per_second": 0.579,
548
+ "eval_wer": 0.3447084963531855,
549
  "step": 9200
550
  },
551
  {
552
+ "epoch": 1.0082591440523436,
553
+ "eval_loss": Infinity,
554
+ "eval_runtime": 105.2731,
555
+ "eval_samples_per_second": 37.056,
556
+ "eval_steps_per_second": 0.579,
557
+ "eval_wer": 0.34103753079263877,
558
  "step": 9400
559
  },
560
  {
561
+ "epoch": 1.0189853051592834,
562
+ "grad_norm": 3.3699042797088623,
563
+ "learning_rate": 1.6966666666666665e-05,
564
+ "loss": 0.5603,
565
  "step": 9500
566
  },
567
  {
568
+ "epoch": 1.0297114662662232,
569
+ "eval_loss": Infinity,
570
+ "eval_runtime": 105.4958,
571
+ "eval_samples_per_second": 36.978,
572
+ "eval_steps_per_second": 0.578,
573
+ "eval_wer": 0.34053035791914216,
574
  "step": 9600
575
  },
576
  {
577
+ "epoch": 1.051163788480103,
578
+ "eval_loss": Infinity,
579
+ "eval_runtime": 105.0872,
580
+ "eval_samples_per_second": 37.122,
581
+ "eval_steps_per_second": 0.58,
582
+ "eval_wer": 0.34120658841713764,
583
  "step": 9800
584
  },
585
  {
586
+ "epoch": 1.0726161106939827,
587
+ "grad_norm": 6.575745582580566,
588
+ "learning_rate": 3.333333333333333e-07,
589
+ "loss": 0.5284,
590
  "step": 10000
591
  },
592
  {
593
+ "epoch": 1.0726161106939827,
594
+ "eval_loss": Infinity,
595
+ "eval_runtime": 108.9867,
596
+ "eval_samples_per_second": 35.793,
597
+ "eval_steps_per_second": 0.56,
598
+ "eval_wer": 0.3401922426701444,
599
  "step": 10000
600
  },
601
  {
602
+ "epoch": 1.0726161106939827,
603
  "step": 10000,
604
+ "total_flos": 4.496412338111517e+18,
605
+ "train_loss": 0.8045109680175782,
606
+ "train_runtime": 7510.9356,
607
+ "train_samples_per_second": 5.326,
608
+ "train_steps_per_second": 1.331
609
  }
610
  ],
611
  "logging_steps": 500,
 
625
  "attributes": {}
626
  }
627
  },
628
+ "total_flos": 4.496412338111517e+18,
629
  "train_batch_size": 4,
630
  "trial_name": null,
631
  "trial_params": null