ales commited on
Commit
3e69ec6
·
1 Parent(s): d30d1e5

End of training

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 0.33,
3
- "eval_loss": 0.5074095726013184,
4
- "eval_runtime": 16.8249,
5
  "eval_samples": 64,
6
- "eval_samples_per_second": 3.804,
7
  "eval_steps_per_second": 0.119,
8
- "eval_wer": 52.197802197802204,
9
- "train_loss": 0.10702953418095906,
10
- "train_runtime": 833.6007,
11
- "train_samples_per_second": 11.516,
12
- "train_steps_per_second": 0.36
13
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "eval_loss": 0.6388838291168213,
4
+ "eval_runtime": 16.8521,
5
  "eval_samples": 64,
6
+ "eval_samples_per_second": 3.798,
7
  "eval_steps_per_second": 0.119,
8
+ "eval_wer": 60.07326007326007,
9
+ "train_loss": 1.0103698587417602,
10
+ "train_runtime": 873.4716,
11
+ "train_samples_per_second": 3.664,
12
+ "train_steps_per_second": 0.114
13
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 0.33,
3
- "eval_loss": 0.5074095726013184,
4
- "eval_runtime": 16.8249,
5
  "eval_samples": 64,
6
- "eval_samples_per_second": 3.804,
7
  "eval_steps_per_second": 0.119,
8
- "eval_wer": 52.197802197802204
9
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "eval_loss": 0.6388838291168213,
4
+ "eval_runtime": 16.8521,
5
  "eval_samples": 64,
6
+ "eval_samples_per_second": 3.798,
7
  "eval_steps_per_second": 0.119,
8
+ "eval_wer": 60.07326007326007
9
  }
runs/Dec13_12-46-08_d7f040c448a8/events.out.tfevents.1670936574.d7f040c448a8.26499.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dc4f4176fff781850f18b76a70711b2188ca1fa5b1b2b1b0424b1b5a5ad1824
3
+ size 352
train.log CHANGED
@@ -153,3 +153,9 @@
153
  {'loss': 0.6647, 'learning_rate': 3.3333333333333333e-06, 'epoch': 1.0}
154
  {'eval_loss': 0.5789934992790222, 'eval_wer': 61.72161172161172, 'eval_runtime': 18.4962, 'eval_samples_per_second': 3.46, 'eval_steps_per_second': 0.108, 'epoch': 1.0}
155
  {'train_runtime': 873.4716, 'train_samples_per_second': 3.664, 'train_steps_per_second': 0.114, 'train_loss': 1.0103698587417602, 'epoch': 1.0}
 
 
 
 
 
 
 
153
  {'loss': 0.6647, 'learning_rate': 3.3333333333333333e-06, 'epoch': 1.0}
154
  {'eval_loss': 0.5789934992790222, 'eval_wer': 61.72161172161172, 'eval_runtime': 18.4962, 'eval_samples_per_second': 3.46, 'eval_steps_per_second': 0.108, 'epoch': 1.0}
155
  {'train_runtime': 873.4716, 'train_samples_per_second': 3.664, 'train_steps_per_second': 0.114, 'train_loss': 1.0103698587417602, 'epoch': 1.0}
156
+ ***** train metrics *****
157
+ epoch = 1.0
158
+ train_loss = 1.0104
159
+ train_runtime = 0:14:33.47
160
+ train_samples_per_second = 3.664
161
+ train_steps_per_second = 0.114
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 0.33,
3
- "train_loss": 0.10702953418095906,
4
- "train_runtime": 833.6007,
5
- "train_samples_per_second": 11.516,
6
- "train_steps_per_second": 0.36
7
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "train_loss": 1.0103698587417602,
4
+ "train_runtime": 873.4716,
5
+ "train_samples_per_second": 3.664,
6
+ "train_steps_per_second": 0.114
7
  }
trainer_state.json CHANGED
@@ -1,484 +1,175 @@
1
  {
2
- "best_metric": 52.197802197802204,
3
- "best_model_checkpoint": "./checkpoint-280",
4
- "epoch": 0.3333333333333333,
5
- "global_step": 300,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.5,
12
- "learning_rate": 8.666666666666667e-05,
13
- "loss": 2.4473,
14
  "step": 10
15
  },
16
  {
17
- "epoch": 0.5,
18
- "eval_loss": 1.3674653768539429,
19
- "eval_runtime": 44.1027,
20
- "eval_samples_per_second": 1.451,
21
- "eval_steps_per_second": 0.045,
22
- "eval_wer": 95.42124542124543,
23
  "step": 10
24
  },
25
  {
26
- "epoch": 1.0,
27
- "learning_rate": 2e-05,
28
- "loss": 1.256,
29
  "step": 20
30
  },
31
  {
32
- "epoch": 1.0,
33
- "eval_loss": 0.974454939365387,
34
- "eval_runtime": 19.7011,
35
- "eval_samples_per_second": 3.249,
36
- "eval_steps_per_second": 0.102,
37
  "eval_wer": 75.27472527472527,
38
  "step": 20
39
  },
40
  {
41
  "epoch": 0.3,
42
- "learning_rate": 7.68421052631579e-05,
43
- "loss": 0.9934,
44
  "step": 30
45
  },
46
  {
47
  "epoch": 0.3,
48
- "eval_loss": 0.811407208442688,
49
- "eval_runtime": 17.9554,
50
- "eval_samples_per_second": 3.564,
51
- "eval_steps_per_second": 0.111,
52
- "eval_wer": 72.16117216117216,
53
  "step": 30
54
  },
55
  {
56
  "epoch": 0.4,
57
- "learning_rate": 6.631578947368421e-05,
58
- "loss": 0.9568,
59
  "step": 40
60
  },
61
  {
62
  "epoch": 0.4,
63
- "eval_loss": 0.7814445495605469,
64
- "eval_runtime": 44.1521,
65
- "eval_samples_per_second": 1.45,
66
- "eval_steps_per_second": 0.045,
67
- "eval_wer": 72.7106227106227,
68
  "step": 40
69
  },
70
  {
71
  "epoch": 0.5,
72
- "learning_rate": 5.5789473684210526e-05,
73
- "loss": 0.6856,
74
  "step": 50
75
  },
76
  {
77
  "epoch": 0.5,
78
- "eval_loss": 0.7517387866973877,
79
- "eval_runtime": 19.7741,
80
- "eval_samples_per_second": 3.237,
81
- "eval_steps_per_second": 0.101,
82
- "eval_wer": 76.92307692307693,
83
  "step": 50
84
  },
85
  {
86
  "epoch": 0.6,
87
- "learning_rate": 4.5263157894736846e-05,
88
- "loss": 0.7808,
89
  "step": 60
90
  },
91
  {
92
  "epoch": 0.6,
93
- "eval_loss": 0.6514347195625305,
94
- "eval_runtime": 19.745,
95
- "eval_samples_per_second": 3.241,
96
- "eval_steps_per_second": 0.101,
97
- "eval_wer": 63.55311355311355,
98
  "step": 60
99
  },
100
  {
101
  "epoch": 0.7,
102
- "learning_rate": 3.473684210526316e-05,
103
- "loss": 0.6826,
104
  "step": 70
105
  },
106
  {
107
  "epoch": 0.7,
108
- "eval_loss": 0.6197173595428467,
109
- "eval_runtime": 19.7014,
110
- "eval_samples_per_second": 3.248,
111
- "eval_steps_per_second": 0.102,
112
- "eval_wer": 60.43956043956044,
113
  "step": 70
114
  },
115
  {
116
  "epoch": 0.8,
117
- "learning_rate": 2.4210526315789474e-05,
118
- "loss": 0.7832,
119
  "step": 80
120
  },
121
  {
122
  "epoch": 0.8,
123
- "eval_loss": 0.6129069328308105,
124
- "eval_runtime": 18.9507,
125
- "eval_samples_per_second": 3.377,
126
- "eval_steps_per_second": 0.106,
127
- "eval_wer": 65.93406593406593,
128
  "step": 80
129
  },
130
  {
131
  "epoch": 0.9,
132
- "learning_rate": 1.3684210526315791e-05,
133
- "loss": 0.6031,
134
  "step": 90
135
  },
136
  {
137
  "epoch": 0.9,
138
- "eval_loss": 0.5876654982566833,
139
- "eval_runtime": 20.4075,
140
- "eval_samples_per_second": 3.136,
141
- "eval_steps_per_second": 0.098,
142
- "eval_wer": 61.35531135531136,
143
  "step": 90
144
  },
145
  {
146
  "epoch": 1.0,
147
- "learning_rate": 3.1578947368421056e-06,
148
- "loss": 0.6678,
149
  "step": 100
150
  },
151
  {
152
  "epoch": 1.0,
153
- "eval_loss": 0.5758526921272278,
154
- "eval_runtime": 19.5593,
155
- "eval_samples_per_second": 3.272,
156
- "eval_steps_per_second": 0.102,
157
- "eval_wer": 61.53846153846154,
158
- "step": 100
159
- },
160
- {
161
- "epoch": 0.07,
162
- "learning_rate": 2.96551724137931e-05,
163
- "loss": 0.4611,
164
- "step": 110
165
- },
166
- {
167
- "epoch": 0.07,
168
- "eval_loss": 0.5625213384628296,
169
- "eval_runtime": 19.7626,
170
- "eval_samples_per_second": 3.238,
171
- "eval_steps_per_second": 0.101,
172
- "eval_wer": 57.692307692307686,
173
- "step": 110
174
- },
175
- {
176
- "epoch": 0.13,
177
- "learning_rate": 2.2758620689655175e-05,
178
- "loss": 0.4451,
179
- "step": 120
180
- },
181
- {
182
- "epoch": 0.13,
183
- "eval_loss": 0.563637375831604,
184
- "eval_runtime": 19.0995,
185
- "eval_samples_per_second": 3.351,
186
- "eval_steps_per_second": 0.105,
187
- "eval_wer": 56.59340659340659,
188
- "step": 120
189
- },
190
- {
191
- "epoch": 0.2,
192
- "learning_rate": 1.586206896551724e-05,
193
- "loss": 0.3615,
194
- "step": 130
195
- },
196
- {
197
- "epoch": 0.2,
198
- "eval_loss": 0.5489554405212402,
199
- "eval_runtime": 18.5694,
200
- "eval_samples_per_second": 3.447,
201
- "eval_steps_per_second": 0.108,
202
- "eval_wer": 61.172161172161175,
203
- "step": 130
204
- },
205
- {
206
- "epoch": 0.27,
207
- "learning_rate": 8.96551724137931e-06,
208
- "loss": 0.4055,
209
- "step": 140
210
- },
211
- {
212
- "epoch": 0.27,
213
- "eval_loss": 0.5382302403450012,
214
- "eval_runtime": 22.4274,
215
- "eval_samples_per_second": 2.854,
216
- "eval_steps_per_second": 0.089,
217
- "eval_wer": 55.12820512820513,
218
- "step": 140
219
- },
220
- {
221
- "epoch": 0.33,
222
- "learning_rate": 2.0689655172413796e-06,
223
- "loss": 0.2946,
224
- "step": 150
225
- },
226
- {
227
- "epoch": 0.33,
228
- "eval_loss": 0.53872150182724,
229
- "eval_runtime": 20.4177,
230
- "eval_samples_per_second": 3.135,
231
- "eval_steps_per_second": 0.098,
232
- "eval_wer": 55.67765567765568,
233
- "step": 150
234
- },
235
- {
236
- "epoch": 0.33,
237
- "step": 150,
238
- "total_flos": 1.18170648576e+17,
239
- "train_loss": 0.13119232177734375,
240
- "train_runtime": 451.4438,
241
- "train_samples_per_second": 10.633,
242
- "train_steps_per_second": 0.332
243
- },
244
- {
245
- "epoch": 0.05,
246
- "learning_rate": 9.5e-06,
247
- "loss": 0.2716,
248
- "step": 160
249
- },
250
- {
251
- "epoch": 0.05,
252
- "eval_loss": 0.5364237427711487,
253
- "eval_runtime": 16.1176,
254
- "eval_samples_per_second": 3.971,
255
- "eval_steps_per_second": 0.124,
256
- "eval_wer": 54.57875457875458,
257
- "step": 160
258
- },
259
- {
260
- "epoch": 0.1,
261
- "learning_rate": 9.050000000000001e-06,
262
- "loss": 0.2765,
263
- "step": 170
264
- },
265
- {
266
- "epoch": 0.1,
267
- "eval_loss": 0.5404230952262878,
268
- "eval_runtime": 17.8451,
269
- "eval_samples_per_second": 3.586,
270
- "eval_steps_per_second": 0.112,
271
- "eval_wer": 54.761904761904766,
272
- "step": 170
273
- },
274
- {
275
- "epoch": 0.15,
276
- "learning_rate": 8.550000000000001e-06,
277
- "loss": 0.2533,
278
- "step": 180
279
- },
280
- {
281
- "epoch": 0.15,
282
- "eval_loss": 0.53352290391922,
283
- "eval_runtime": 17.1042,
284
- "eval_samples_per_second": 3.742,
285
- "eval_steps_per_second": 0.117,
286
- "eval_wer": 55.12820512820513,
287
- "step": 180
288
- },
289
- {
290
- "epoch": 0.2,
291
- "learning_rate": 8.050000000000001e-06,
292
- "loss": 0.3533,
293
- "step": 190
294
- },
295
- {
296
- "epoch": 0.2,
297
- "eval_loss": 0.530021071434021,
298
- "eval_runtime": 18.1912,
299
- "eval_samples_per_second": 3.518,
300
- "eval_steps_per_second": 0.11,
301
- "eval_wer": 56.59340659340659,
302
- "step": 190
303
- },
304
- {
305
- "epoch": 0.25,
306
- "learning_rate": 7.5500000000000006e-06,
307
- "loss": 0.2844,
308
- "step": 200
309
- },
310
- {
311
- "epoch": 0.25,
312
- "eval_loss": 0.5341857671737671,
313
- "eval_runtime": 17.7172,
314
- "eval_samples_per_second": 3.612,
315
- "eval_steps_per_second": 0.113,
316
- "eval_wer": 55.311355311355314,
317
- "step": 200
318
- },
319
- {
320
- "epoch": 0.03,
321
- "learning_rate": 8.033333333333335e-06,
322
- "loss": 0.1922,
323
- "step": 210
324
- },
325
- {
326
- "epoch": 0.03,
327
- "eval_loss": 0.5239479541778564,
328
- "eval_runtime": 16.5572,
329
- "eval_samples_per_second": 3.865,
330
- "eval_steps_per_second": 0.121,
331
- "eval_wer": 55.494505494505496,
332
- "step": 210
333
- },
334
- {
335
- "epoch": 0.07,
336
- "learning_rate": 7.7e-06,
337
- "loss": 0.2229,
338
- "step": 220
339
- },
340
- {
341
- "epoch": 0.07,
342
- "eval_loss": 0.5312361717224121,
343
- "eval_runtime": 17.8694,
344
- "eval_samples_per_second": 3.582,
345
- "eval_steps_per_second": 0.112,
346
- "eval_wer": 54.395604395604394,
347
- "step": 220
348
- },
349
- {
350
- "epoch": 0.1,
351
- "learning_rate": 7.3666666666666676e-06,
352
- "loss": 0.1976,
353
- "step": 230
354
- },
355
- {
356
- "epoch": 0.1,
357
- "eval_loss": 0.5302589535713196,
358
- "eval_runtime": 17.0912,
359
- "eval_samples_per_second": 3.745,
360
- "eval_steps_per_second": 0.117,
361
- "eval_wer": 54.02930402930403,
362
- "step": 230
363
- },
364
- {
365
- "epoch": 0.13,
366
- "learning_rate": 7.033333333333334e-06,
367
- "loss": 0.2823,
368
- "step": 240
369
- },
370
- {
371
- "epoch": 0.13,
372
- "eval_loss": 0.5269189476966858,
373
- "eval_runtime": 17.9989,
374
- "eval_samples_per_second": 3.556,
375
- "eval_steps_per_second": 0.111,
376
- "eval_wer": 54.02930402930403,
377
- "step": 240
378
- },
379
- {
380
- "epoch": 0.17,
381
- "learning_rate": 6.700000000000001e-06,
382
- "loss": 0.2265,
383
- "step": 250
384
- },
385
- {
386
- "epoch": 0.17,
387
- "eval_loss": 0.5312862992286682,
388
- "eval_runtime": 18.4593,
389
- "eval_samples_per_second": 3.467,
390
  "eval_steps_per_second": 0.108,
391
- "eval_wer": 55.67765567765568,
392
- "step": 250
393
- },
394
- {
395
- "epoch": 0.2,
396
- "learning_rate": 6.366666666666668e-06,
397
- "loss": 0.3728,
398
- "step": 260
399
- },
400
- {
401
- "epoch": 0.2,
402
- "eval_loss": 0.5128015279769897,
403
- "eval_runtime": 18.2249,
404
- "eval_samples_per_second": 3.512,
405
- "eval_steps_per_second": 0.11,
406
- "eval_wer": 53.47985347985348,
407
- "step": 260
408
- },
409
- {
410
- "epoch": 0.23,
411
- "learning_rate": 6.033333333333335e-06,
412
- "loss": 0.3738,
413
- "step": 270
414
- },
415
- {
416
- "epoch": 0.23,
417
- "eval_loss": 0.5025143623352051,
418
- "eval_runtime": 17.2543,
419
- "eval_samples_per_second": 3.709,
420
- "eval_steps_per_second": 0.116,
421
- "eval_wer": 52.74725274725275,
422
- "step": 270
423
- },
424
- {
425
- "epoch": 0.27,
426
- "learning_rate": 5.7e-06,
427
- "loss": 0.488,
428
- "step": 280
429
- },
430
- {
431
- "epoch": 0.27,
432
- "eval_loss": 0.5074095726013184,
433
- "eval_runtime": 18.4062,
434
- "eval_samples_per_second": 3.477,
435
- "eval_steps_per_second": 0.109,
436
- "eval_wer": 52.197802197802204,
437
- "step": 280
438
- },
439
- {
440
- "epoch": 0.3,
441
- "learning_rate": 5.366666666666666e-06,
442
- "loss": 0.4142,
443
- "step": 290
444
- },
445
- {
446
- "epoch": 0.3,
447
- "eval_loss": 0.5057792663574219,
448
- "eval_runtime": 16.5894,
449
- "eval_samples_per_second": 3.858,
450
- "eval_steps_per_second": 0.121,
451
- "eval_wer": 52.56410256410257,
452
- "step": 290
453
- },
454
- {
455
- "epoch": 0.33,
456
- "learning_rate": 5.033333333333333e-06,
457
- "loss": 0.4407,
458
- "step": 300
459
- },
460
- {
461
- "epoch": 0.33,
462
- "eval_loss": 0.5046072602272034,
463
- "eval_runtime": 17.9261,
464
- "eval_samples_per_second": 3.57,
465
- "eval_steps_per_second": 0.112,
466
- "eval_wer": 53.11355311355312,
467
- "step": 300
468
  },
469
  {
470
- "epoch": 0.33,
471
- "step": 300,
472
- "total_flos": 2.36341297152e+17,
473
- "train_loss": 0.10702953418095906,
474
- "train_runtime": 833.6007,
475
- "train_samples_per_second": 11.516,
476
- "train_steps_per_second": 0.36
477
  }
478
  ],
479
- "max_steps": 300,
480
  "num_train_epochs": 9223372036854775807,
481
- "total_flos": 2.36341297152e+17,
482
  "trial_name": null,
483
  "trial_params": null
484
  }
 
1
  {
2
+ "best_metric": 60.07326007326007,
3
+ "best_model_checkpoint": "./checkpoint-70",
4
+ "epoch": 1.0,
5
+ "global_step": 100,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.1,
12
+ "learning_rate": 7e-05,
13
+ "loss": 2.5622,
14
  "step": 10
15
  },
16
  {
17
+ "epoch": 0.1,
18
+ "eval_loss": 1.5401501655578613,
19
+ "eval_runtime": 62.3188,
20
+ "eval_samples_per_second": 1.027,
21
+ "eval_steps_per_second": 0.032,
22
+ "eval_wer": 94.5054945054945,
23
  "step": 10
24
  },
25
  {
26
+ "epoch": 0.2,
27
+ "learning_rate": 9.222222222222223e-05,
28
+ "loss": 1.3719,
29
  "step": 20
30
  },
31
  {
32
+ "epoch": 0.2,
33
+ "eval_loss": 1.001212239265442,
34
+ "eval_runtime": 18.9902,
35
+ "eval_samples_per_second": 3.37,
36
+ "eval_steps_per_second": 0.105,
37
  "eval_wer": 75.27472527472527,
38
  "step": 20
39
  },
40
  {
41
  "epoch": 0.3,
42
+ "learning_rate": 8.111111111111112e-05,
43
+ "loss": 0.9898,
44
  "step": 30
45
  },
46
  {
47
  "epoch": 0.3,
48
+ "eval_loss": 0.8217034339904785,
49
+ "eval_runtime": 17.7847,
50
+ "eval_samples_per_second": 3.599,
51
+ "eval_steps_per_second": 0.112,
52
+ "eval_wer": 72.7106227106227,
53
  "step": 30
54
  },
55
  {
56
  "epoch": 0.4,
57
+ "learning_rate": 7e-05,
58
+ "loss": 0.9742,
59
  "step": 40
60
  },
61
  {
62
  "epoch": 0.4,
63
+ "eval_loss": 0.7924289107322693,
64
+ "eval_runtime": 19.063,
65
+ "eval_samples_per_second": 3.357,
66
+ "eval_steps_per_second": 0.105,
67
+ "eval_wer": 72.52747252747253,
68
  "step": 40
69
  },
70
  {
71
  "epoch": 0.5,
72
+ "learning_rate": 5.8888888888888896e-05,
73
+ "loss": 0.6951,
74
  "step": 50
75
  },
76
  {
77
  "epoch": 0.5,
78
+ "eval_loss": 0.7628086805343628,
79
+ "eval_runtime": 17.8327,
80
+ "eval_samples_per_second": 3.589,
81
+ "eval_steps_per_second": 0.112,
82
+ "eval_wer": 76.19047619047619,
83
  "step": 50
84
  },
85
  {
86
  "epoch": 0.6,
87
+ "learning_rate": 4.7777777777777784e-05,
88
+ "loss": 0.7824,
89
  "step": 60
90
  },
91
  {
92
  "epoch": 0.6,
93
+ "eval_loss": 0.6737741231918335,
94
+ "eval_runtime": 18.8876,
95
+ "eval_samples_per_second": 3.388,
96
+ "eval_steps_per_second": 0.106,
97
+ "eval_wer": 65.38461538461539,
98
  "step": 60
99
  },
100
  {
101
  "epoch": 0.7,
102
+ "learning_rate": 3.6666666666666666e-05,
103
+ "loss": 0.6818,
104
  "step": 70
105
  },
106
  {
107
  "epoch": 0.7,
108
+ "eval_loss": 0.6388838291168213,
109
+ "eval_runtime": 18.9502,
110
+ "eval_samples_per_second": 3.377,
111
+ "eval_steps_per_second": 0.106,
112
+ "eval_wer": 60.07326007326007,
113
  "step": 70
114
  },
115
  {
116
  "epoch": 0.8,
117
+ "learning_rate": 2.5555555555555554e-05,
118
+ "loss": 0.7823,
119
  "step": 80
120
  },
121
  {
122
  "epoch": 0.8,
123
+ "eval_loss": 0.6208388805389404,
124
+ "eval_runtime": 18.1881,
125
+ "eval_samples_per_second": 3.519,
126
+ "eval_steps_per_second": 0.11,
127
+ "eval_wer": 65.75091575091575,
128
  "step": 80
129
  },
130
  {
131
  "epoch": 0.9,
132
+ "learning_rate": 1.4444444444444444e-05,
133
+ "loss": 0.5994,
134
  "step": 90
135
  },
136
  {
137
  "epoch": 0.9,
138
+ "eval_loss": 0.5900620818138123,
139
+ "eval_runtime": 17.489,
140
+ "eval_samples_per_second": 3.659,
141
+ "eval_steps_per_second": 0.114,
142
+ "eval_wer": 61.904761904761905,
143
  "step": 90
144
  },
145
  {
146
  "epoch": 1.0,
147
+ "learning_rate": 3.3333333333333333e-06,
148
+ "loss": 0.6647,
149
  "step": 100
150
  },
151
  {
152
  "epoch": 1.0,
153
+ "eval_loss": 0.5789934992790222,
154
+ "eval_runtime": 18.4962,
155
+ "eval_samples_per_second": 3.46,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  "eval_steps_per_second": 0.108,
157
+ "eval_wer": 61.72161172161172,
158
+ "step": 100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  },
160
  {
161
+ "epoch": 1.0,
162
+ "step": 100,
163
+ "total_flos": 7.8780432384e+16,
164
+ "train_loss": 1.0103698587417602,
165
+ "train_runtime": 873.4716,
166
+ "train_samples_per_second": 3.664,
167
+ "train_steps_per_second": 0.114
168
  }
169
  ],
170
+ "max_steps": 100,
171
  "num_train_epochs": 9223372036854775807,
172
+ "total_flos": 7.8780432384e+16,
173
  "trial_name": null,
174
  "trial_params": null
175
  }