Safetensors
wav2vec2-bert
indiejoseph commited on
Commit
1f89a01
·
verified ·
1 Parent(s): 1e0eb31

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "facebook/w2v-bert-2.0",
3
  "activation_dropout": 0.0,
4
  "adapter_act": "relu",
5
  "adapter_kernel_size": 3,
@@ -75,7 +75,7 @@
75
  ],
76
  "tone_vocab_size": 11,
77
  "torch_dtype": "float32",
78
- "transformers_version": "4.46.0",
79
  "use_intermediate_ffn_before_adapter": false,
80
  "use_weighted_layer_sum": false,
81
  "vocab_size": 77,
 
1
  {
2
+ "_name_or_path": "hon9kon9ize/wav2vec2bert-jyutping",
3
  "activation_dropout": 0.0,
4
  "adapter_act": "relu",
5
  "adapter_kernel_size": 3,
 
75
  ],
76
  "tone_vocab_size": 11,
77
  "torch_dtype": "float32",
78
+ "transformers_version": "4.46.1",
79
  "use_intermediate_ffn_before_adapter": false,
80
  "use_weighted_layer_sum": false,
81
  "vocab_size": 77,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3644f073720a83f1f6d62e8f540c41143590e160d6e76b60bbef55cbc9ab9527
3
  size 2423167328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b68dcd1a36919d35d72b757b0ca6c65a0583f40e9ad4487899b0f5be73c2bee
3
  size 2423167328
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:accc05a8c704ac5f4dd34e52ac9ee8fd9f480b31ecd7816f5f047e17bc979032
3
+ size 4846796656
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8bd5e5f909eb40d3d4de11c5df93a9ebe96df61c70e0a9177f933a0f30ebb074
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4c8114632b7b23c6ef3e5b14efd79a6a14017d51b4413b6692ec871a255c3cd
3
+ size 1064
tokenizer_config.json CHANGED
@@ -18,585 +18,585 @@
18
  },
19
  "2": {
20
  "content": "|",
21
- "lstrip": false,
22
- "normalized": true,
23
- "rstrip": false,
24
  "single_word": false,
25
  "special": false
26
  },
27
  "3": {
28
  "content": "aa",
29
- "lstrip": false,
30
- "normalized": true,
31
- "rstrip": false,
32
  "single_word": false,
33
  "special": false
34
  },
35
  "4": {
36
  "content": "aai",
37
- "lstrip": false,
38
- "normalized": true,
39
- "rstrip": false,
40
  "single_word": false,
41
  "special": false
42
  },
43
  "5": {
44
  "content": "aak",
45
- "lstrip": false,
46
- "normalized": true,
47
- "rstrip": false,
48
  "single_word": false,
49
  "special": false
50
  },
51
  "6": {
52
  "content": "aam",
53
- "lstrip": false,
54
- "normalized": true,
55
- "rstrip": false,
56
  "single_word": false,
57
  "special": false
58
  },
59
  "7": {
60
  "content": "aan",
61
- "lstrip": false,
62
- "normalized": true,
63
- "rstrip": false,
64
  "single_word": false,
65
  "special": false
66
  },
67
  "8": {
68
  "content": "aang",
69
- "lstrip": false,
70
- "normalized": true,
71
- "rstrip": false,
72
  "single_word": false,
73
  "special": false
74
  },
75
  "9": {
76
  "content": "aap",
77
- "lstrip": false,
78
- "normalized": true,
79
- "rstrip": false,
80
  "single_word": false,
81
  "special": false
82
  },
83
  "10": {
84
  "content": "aat",
85
- "lstrip": false,
86
- "normalized": true,
87
- "rstrip": false,
88
  "single_word": false,
89
  "special": false
90
  },
91
  "11": {
92
  "content": "aau",
93
- "lstrip": false,
94
- "normalized": true,
95
- "rstrip": false,
96
  "single_word": false,
97
  "special": false
98
  },
99
  "12": {
100
  "content": "ai",
101
- "lstrip": false,
102
- "normalized": true,
103
- "rstrip": false,
104
  "single_word": false,
105
  "special": false
106
  },
107
  "13": {
108
  "content": "ak",
109
- "lstrip": false,
110
- "normalized": true,
111
- "rstrip": false,
112
  "single_word": false,
113
  "special": false
114
  },
115
  "14": {
116
  "content": "am",
117
- "lstrip": false,
118
- "normalized": true,
119
- "rstrip": false,
120
  "single_word": false,
121
  "special": false
122
  },
123
  "15": {
124
  "content": "an",
125
- "lstrip": false,
126
- "normalized": true,
127
- "rstrip": false,
128
  "single_word": false,
129
  "special": false
130
  },
131
  "16": {
132
  "content": "ang",
133
- "lstrip": false,
134
- "normalized": true,
135
- "rstrip": false,
136
  "single_word": false,
137
  "special": false
138
  },
139
  "17": {
140
  "content": "ap",
141
- "lstrip": false,
142
- "normalized": true,
143
- "rstrip": false,
144
  "single_word": false,
145
  "special": false
146
  },
147
  "18": {
148
  "content": "at",
149
- "lstrip": false,
150
- "normalized": true,
151
- "rstrip": false,
152
  "single_word": false,
153
  "special": false
154
  },
155
  "19": {
156
  "content": "au",
157
- "lstrip": false,
158
- "normalized": true,
159
- "rstrip": false,
160
  "single_word": false,
161
  "special": false
162
  },
163
  "20": {
164
  "content": "b",
165
- "lstrip": false,
166
- "normalized": true,
167
- "rstrip": false,
168
  "single_word": false,
169
  "special": false
170
  },
171
  "21": {
172
  "content": "c",
173
- "lstrip": false,
174
- "normalized": true,
175
- "rstrip": false,
176
  "single_word": false,
177
  "special": false
178
  },
179
  "22": {
180
  "content": "d",
181
- "lstrip": false,
182
- "normalized": true,
183
- "rstrip": false,
184
  "single_word": false,
185
  "special": false
186
  },
187
  "23": {
188
  "content": "e",
189
- "lstrip": false,
190
- "normalized": true,
191
- "rstrip": false,
192
  "single_word": false,
193
  "special": false
194
  },
195
  "24": {
196
  "content": "ei",
197
- "lstrip": false,
198
- "normalized": true,
199
- "rstrip": false,
200
  "single_word": false,
201
  "special": false
202
  },
203
  "25": {
204
  "content": "ek",
205
- "lstrip": false,
206
- "normalized": true,
207
- "rstrip": false,
208
  "single_word": false,
209
  "special": false
210
  },
211
  "26": {
212
  "content": "eng",
213
- "lstrip": false,
214
- "normalized": true,
215
- "rstrip": false,
216
  "single_word": false,
217
  "special": false
218
  },
219
  "27": {
220
  "content": "eoi",
221
- "lstrip": false,
222
- "normalized": true,
223
- "rstrip": false,
224
  "single_word": false,
225
  "special": false
226
  },
227
  "28": {
228
  "content": "eon",
229
- "lstrip": false,
230
- "normalized": true,
231
- "rstrip": false,
232
  "single_word": false,
233
  "special": false
234
  },
235
  "29": {
236
  "content": "eot",
237
- "lstrip": false,
238
- "normalized": true,
239
- "rstrip": false,
240
  "single_word": false,
241
  "special": false
242
  },
243
  "30": {
244
  "content": "ep",
245
- "lstrip": false,
246
- "normalized": true,
247
- "rstrip": false,
248
  "single_word": false,
249
  "special": false
250
  },
251
  "31": {
252
  "content": "eu",
253
- "lstrip": false,
254
- "normalized": true,
255
- "rstrip": false,
256
  "single_word": false,
257
  "special": false
258
  },
259
  "32": {
260
  "content": "f",
261
- "lstrip": false,
262
- "normalized": true,
263
- "rstrip": false,
264
  "single_word": false,
265
  "special": false
266
  },
267
  "33": {
268
  "content": "g",
269
- "lstrip": false,
270
- "normalized": true,
271
- "rstrip": false,
272
  "single_word": false,
273
  "special": false
274
  },
275
  "34": {
276
  "content": "gw",
277
- "lstrip": false,
278
- "normalized": true,
279
- "rstrip": false,
280
  "single_word": false,
281
  "special": false
282
  },
283
  "35": {
284
  "content": "h",
285
- "lstrip": false,
286
- "normalized": true,
287
- "rstrip": false,
288
  "single_word": false,
289
  "special": false
290
  },
291
  "36": {
292
  "content": "i",
293
- "lstrip": false,
294
- "normalized": true,
295
- "rstrip": false,
296
  "single_word": false,
297
  "special": false
298
  },
299
  "37": {
300
  "content": "ik",
301
- "lstrip": false,
302
- "normalized": true,
303
- "rstrip": false,
304
  "single_word": false,
305
  "special": false
306
  },
307
  "38": {
308
  "content": "im",
309
- "lstrip": false,
310
- "normalized": true,
311
- "rstrip": false,
312
  "single_word": false,
313
  "special": false
314
  },
315
  "39": {
316
  "content": "in",
317
- "lstrip": false,
318
- "normalized": true,
319
- "rstrip": false,
320
  "single_word": false,
321
  "special": false
322
  },
323
  "40": {
324
  "content": "ing",
325
- "lstrip": false,
326
- "normalized": true,
327
- "rstrip": false,
328
  "single_word": false,
329
  "special": false
330
  },
331
  "41": {
332
  "content": "ip",
333
- "lstrip": false,
334
- "normalized": true,
335
- "rstrip": false,
336
  "single_word": false,
337
  "special": false
338
  },
339
  "42": {
340
  "content": "it",
341
- "lstrip": false,
342
- "normalized": true,
343
- "rstrip": false,
344
  "single_word": false,
345
  "special": false
346
  },
347
  "43": {
348
  "content": "iu",
349
- "lstrip": false,
350
- "normalized": true,
351
- "rstrip": false,
352
  "single_word": false,
353
  "special": false
354
  },
355
  "44": {
356
  "content": "j",
357
- "lstrip": false,
358
- "normalized": true,
359
- "rstrip": false,
360
  "single_word": false,
361
  "special": false
362
  },
363
  "45": {
364
  "content": "k",
365
- "lstrip": false,
366
- "normalized": true,
367
- "rstrip": false,
368
  "single_word": false,
369
  "special": false
370
  },
371
  "46": {
372
  "content": "kw",
373
- "lstrip": false,
374
- "normalized": true,
375
- "rstrip": false,
376
  "single_word": false,
377
  "special": false
378
  },
379
  "47": {
380
  "content": "l",
381
- "lstrip": false,
382
- "normalized": true,
383
- "rstrip": false,
384
  "single_word": false,
385
  "special": false
386
  },
387
  "48": {
388
  "content": "m",
389
- "lstrip": false,
390
- "normalized": true,
391
- "rstrip": false,
392
  "single_word": false,
393
  "special": false
394
  },
395
  "49": {
396
  "content": "n",
397
- "lstrip": false,
398
- "normalized": true,
399
- "rstrip": false,
400
  "single_word": false,
401
  "special": false
402
  },
403
  "50": {
404
  "content": "ng",
405
- "lstrip": false,
406
- "normalized": true,
407
- "rstrip": false,
408
  "single_word": false,
409
  "special": false
410
  },
411
  "51": {
412
  "content": "o",
413
- "lstrip": false,
414
- "normalized": true,
415
- "rstrip": false,
416
  "single_word": false,
417
  "special": false
418
  },
419
  "52": {
420
  "content": "oe",
421
- "lstrip": false,
422
- "normalized": true,
423
- "rstrip": false,
424
  "single_word": false,
425
  "special": false
426
  },
427
  "53": {
428
  "content": "oek",
429
- "lstrip": false,
430
- "normalized": true,
431
- "rstrip": false,
432
  "single_word": false,
433
  "special": false
434
  },
435
  "54": {
436
  "content": "oeng",
437
- "lstrip": false,
438
- "normalized": true,
439
- "rstrip": false,
440
  "single_word": false,
441
  "special": false
442
  },
443
  "55": {
444
  "content": "oi",
445
- "lstrip": false,
446
- "normalized": true,
447
- "rstrip": false,
448
  "single_word": false,
449
  "special": false
450
  },
451
  "56": {
452
  "content": "ok",
453
- "lstrip": false,
454
- "normalized": true,
455
- "rstrip": false,
456
  "single_word": false,
457
  "special": false
458
  },
459
  "57": {
460
  "content": "on",
461
- "lstrip": false,
462
- "normalized": true,
463
- "rstrip": false,
464
  "single_word": false,
465
  "special": false
466
  },
467
  "58": {
468
  "content": "ong",
469
- "lstrip": false,
470
- "normalized": true,
471
- "rstrip": false,
472
  "single_word": false,
473
  "special": false
474
  },
475
  "59": {
476
  "content": "ot",
477
- "lstrip": false,
478
- "normalized": true,
479
- "rstrip": false,
480
  "single_word": false,
481
  "special": false
482
  },
483
  "60": {
484
  "content": "ou",
485
- "lstrip": false,
486
- "normalized": true,
487
- "rstrip": false,
488
  "single_word": false,
489
  "special": false
490
  },
491
  "61": {
492
  "content": "p",
493
- "lstrip": false,
494
- "normalized": true,
495
- "rstrip": false,
496
  "single_word": false,
497
  "special": false
498
  },
499
  "62": {
500
  "content": "s",
501
- "lstrip": false,
502
- "normalized": true,
503
- "rstrip": false,
504
  "single_word": false,
505
  "special": false
506
  },
507
  "63": {
508
  "content": "t",
509
- "lstrip": false,
510
- "normalized": true,
511
- "rstrip": false,
512
  "single_word": false,
513
  "special": false
514
  },
515
  "64": {
516
  "content": "u",
517
- "lstrip": false,
518
- "normalized": true,
519
- "rstrip": false,
520
  "single_word": false,
521
  "special": false
522
  },
523
  "65": {
524
  "content": "ui",
525
- "lstrip": false,
526
- "normalized": true,
527
- "rstrip": false,
528
  "single_word": false,
529
  "special": false
530
  },
531
  "66": {
532
  "content": "uk",
533
- "lstrip": false,
534
- "normalized": true,
535
- "rstrip": false,
536
  "single_word": false,
537
  "special": false
538
  },
539
  "67": {
540
  "content": "un",
541
- "lstrip": false,
542
- "normalized": true,
543
- "rstrip": false,
544
  "single_word": false,
545
  "special": false
546
  },
547
  "68": {
548
  "content": "ung",
549
- "lstrip": false,
550
- "normalized": true,
551
- "rstrip": false,
552
  "single_word": false,
553
  "special": false
554
  },
555
  "69": {
556
  "content": "ut",
557
- "lstrip": false,
558
- "normalized": true,
559
- "rstrip": false,
560
  "single_word": false,
561
  "special": false
562
  },
563
  "70": {
564
  "content": "w",
565
- "lstrip": false,
566
- "normalized": true,
567
- "rstrip": false,
568
  "single_word": false,
569
  "special": false
570
  },
571
  "71": {
572
  "content": "yu",
573
- "lstrip": false,
574
- "normalized": true,
575
- "rstrip": false,
576
  "single_word": false,
577
  "special": false
578
  },
579
  "72": {
580
  "content": "yun",
581
- "lstrip": false,
582
- "normalized": true,
583
- "rstrip": false,
584
  "single_word": false,
585
  "special": false
586
  },
587
  "73": {
588
  "content": "yut",
589
- "lstrip": false,
590
- "normalized": true,
591
- "rstrip": false,
592
  "single_word": false,
593
  "special": false
594
  },
595
  "74": {
596
  "content": "z",
597
- "lstrip": false,
598
- "normalized": true,
599
- "rstrip": false,
600
  "single_word": false,
601
  "special": false
602
  },
@@ -629,4 +629,4 @@
629
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
630
  "unk_token": "[UNK]",
631
  "word_delimiter_token": "|"
632
- }
 
18
  },
19
  "2": {
20
  "content": "|",
21
+ "lstrip": true,
22
+ "normalized": false,
23
+ "rstrip": true,
24
  "single_word": false,
25
  "special": false
26
  },
27
  "3": {
28
  "content": "aa",
29
+ "lstrip": true,
30
+ "normalized": false,
31
+ "rstrip": true,
32
  "single_word": false,
33
  "special": false
34
  },
35
  "4": {
36
  "content": "aai",
37
+ "lstrip": true,
38
+ "normalized": false,
39
+ "rstrip": true,
40
  "single_word": false,
41
  "special": false
42
  },
43
  "5": {
44
  "content": "aak",
45
+ "lstrip": true,
46
+ "normalized": false,
47
+ "rstrip": true,
48
  "single_word": false,
49
  "special": false
50
  },
51
  "6": {
52
  "content": "aam",
53
+ "lstrip": true,
54
+ "normalized": false,
55
+ "rstrip": true,
56
  "single_word": false,
57
  "special": false
58
  },
59
  "7": {
60
  "content": "aan",
61
+ "lstrip": true,
62
+ "normalized": false,
63
+ "rstrip": true,
64
  "single_word": false,
65
  "special": false
66
  },
67
  "8": {
68
  "content": "aang",
69
+ "lstrip": true,
70
+ "normalized": false,
71
+ "rstrip": true,
72
  "single_word": false,
73
  "special": false
74
  },
75
  "9": {
76
  "content": "aap",
77
+ "lstrip": true,
78
+ "normalized": false,
79
+ "rstrip": true,
80
  "single_word": false,
81
  "special": false
82
  },
83
  "10": {
84
  "content": "aat",
85
+ "lstrip": true,
86
+ "normalized": false,
87
+ "rstrip": true,
88
  "single_word": false,
89
  "special": false
90
  },
91
  "11": {
92
  "content": "aau",
93
+ "lstrip": true,
94
+ "normalized": false,
95
+ "rstrip": true,
96
  "single_word": false,
97
  "special": false
98
  },
99
  "12": {
100
  "content": "ai",
101
+ "lstrip": true,
102
+ "normalized": false,
103
+ "rstrip": true,
104
  "single_word": false,
105
  "special": false
106
  },
107
  "13": {
108
  "content": "ak",
109
+ "lstrip": true,
110
+ "normalized": false,
111
+ "rstrip": true,
112
  "single_word": false,
113
  "special": false
114
  },
115
  "14": {
116
  "content": "am",
117
+ "lstrip": true,
118
+ "normalized": false,
119
+ "rstrip": true,
120
  "single_word": false,
121
  "special": false
122
  },
123
  "15": {
124
  "content": "an",
125
+ "lstrip": true,
126
+ "normalized": false,
127
+ "rstrip": true,
128
  "single_word": false,
129
  "special": false
130
  },
131
  "16": {
132
  "content": "ang",
133
+ "lstrip": true,
134
+ "normalized": false,
135
+ "rstrip": true,
136
  "single_word": false,
137
  "special": false
138
  },
139
  "17": {
140
  "content": "ap",
141
+ "lstrip": true,
142
+ "normalized": false,
143
+ "rstrip": true,
144
  "single_word": false,
145
  "special": false
146
  },
147
  "18": {
148
  "content": "at",
149
+ "lstrip": true,
150
+ "normalized": false,
151
+ "rstrip": true,
152
  "single_word": false,
153
  "special": false
154
  },
155
  "19": {
156
  "content": "au",
157
+ "lstrip": true,
158
+ "normalized": false,
159
+ "rstrip": true,
160
  "single_word": false,
161
  "special": false
162
  },
163
  "20": {
164
  "content": "b",
165
+ "lstrip": true,
166
+ "normalized": false,
167
+ "rstrip": true,
168
  "single_word": false,
169
  "special": false
170
  },
171
  "21": {
172
  "content": "c",
173
+ "lstrip": true,
174
+ "normalized": false,
175
+ "rstrip": true,
176
  "single_word": false,
177
  "special": false
178
  },
179
  "22": {
180
  "content": "d",
181
+ "lstrip": true,
182
+ "normalized": false,
183
+ "rstrip": true,
184
  "single_word": false,
185
  "special": false
186
  },
187
  "23": {
188
  "content": "e",
189
+ "lstrip": true,
190
+ "normalized": false,
191
+ "rstrip": true,
192
  "single_word": false,
193
  "special": false
194
  },
195
  "24": {
196
  "content": "ei",
197
+ "lstrip": true,
198
+ "normalized": false,
199
+ "rstrip": true,
200
  "single_word": false,
201
  "special": false
202
  },
203
  "25": {
204
  "content": "ek",
205
+ "lstrip": true,
206
+ "normalized": false,
207
+ "rstrip": true,
208
  "single_word": false,
209
  "special": false
210
  },
211
  "26": {
212
  "content": "eng",
213
+ "lstrip": true,
214
+ "normalized": false,
215
+ "rstrip": true,
216
  "single_word": false,
217
  "special": false
218
  },
219
  "27": {
220
  "content": "eoi",
221
+ "lstrip": true,
222
+ "normalized": false,
223
+ "rstrip": true,
224
  "single_word": false,
225
  "special": false
226
  },
227
  "28": {
228
  "content": "eon",
229
+ "lstrip": true,
230
+ "normalized": false,
231
+ "rstrip": true,
232
  "single_word": false,
233
  "special": false
234
  },
235
  "29": {
236
  "content": "eot",
237
+ "lstrip": true,
238
+ "normalized": false,
239
+ "rstrip": true,
240
  "single_word": false,
241
  "special": false
242
  },
243
  "30": {
244
  "content": "ep",
245
+ "lstrip": true,
246
+ "normalized": false,
247
+ "rstrip": true,
248
  "single_word": false,
249
  "special": false
250
  },
251
  "31": {
252
  "content": "eu",
253
+ "lstrip": true,
254
+ "normalized": false,
255
+ "rstrip": true,
256
  "single_word": false,
257
  "special": false
258
  },
259
  "32": {
260
  "content": "f",
261
+ "lstrip": true,
262
+ "normalized": false,
263
+ "rstrip": true,
264
  "single_word": false,
265
  "special": false
266
  },
267
  "33": {
268
  "content": "g",
269
+ "lstrip": true,
270
+ "normalized": false,
271
+ "rstrip": true,
272
  "single_word": false,
273
  "special": false
274
  },
275
  "34": {
276
  "content": "gw",
277
+ "lstrip": true,
278
+ "normalized": false,
279
+ "rstrip": true,
280
  "single_word": false,
281
  "special": false
282
  },
283
  "35": {
284
  "content": "h",
285
+ "lstrip": true,
286
+ "normalized": false,
287
+ "rstrip": true,
288
  "single_word": false,
289
  "special": false
290
  },
291
  "36": {
292
  "content": "i",
293
+ "lstrip": true,
294
+ "normalized": false,
295
+ "rstrip": true,
296
  "single_word": false,
297
  "special": false
298
  },
299
  "37": {
300
  "content": "ik",
301
+ "lstrip": true,
302
+ "normalized": false,
303
+ "rstrip": true,
304
  "single_word": false,
305
  "special": false
306
  },
307
  "38": {
308
  "content": "im",
309
+ "lstrip": true,
310
+ "normalized": false,
311
+ "rstrip": true,
312
  "single_word": false,
313
  "special": false
314
  },
315
  "39": {
316
  "content": "in",
317
+ "lstrip": true,
318
+ "normalized": false,
319
+ "rstrip": true,
320
  "single_word": false,
321
  "special": false
322
  },
323
  "40": {
324
  "content": "ing",
325
+ "lstrip": true,
326
+ "normalized": false,
327
+ "rstrip": true,
328
  "single_word": false,
329
  "special": false
330
  },
331
  "41": {
332
  "content": "ip",
333
+ "lstrip": true,
334
+ "normalized": false,
335
+ "rstrip": true,
336
  "single_word": false,
337
  "special": false
338
  },
339
  "42": {
340
  "content": "it",
341
+ "lstrip": true,
342
+ "normalized": false,
343
+ "rstrip": true,
344
  "single_word": false,
345
  "special": false
346
  },
347
  "43": {
348
  "content": "iu",
349
+ "lstrip": true,
350
+ "normalized": false,
351
+ "rstrip": true,
352
  "single_word": false,
353
  "special": false
354
  },
355
  "44": {
356
  "content": "j",
357
+ "lstrip": true,
358
+ "normalized": false,
359
+ "rstrip": true,
360
  "single_word": false,
361
  "special": false
362
  },
363
  "45": {
364
  "content": "k",
365
+ "lstrip": true,
366
+ "normalized": false,
367
+ "rstrip": true,
368
  "single_word": false,
369
  "special": false
370
  },
371
  "46": {
372
  "content": "kw",
373
+ "lstrip": true,
374
+ "normalized": false,
375
+ "rstrip": true,
376
  "single_word": false,
377
  "special": false
378
  },
379
  "47": {
380
  "content": "l",
381
+ "lstrip": true,
382
+ "normalized": false,
383
+ "rstrip": true,
384
  "single_word": false,
385
  "special": false
386
  },
387
  "48": {
388
  "content": "m",
389
+ "lstrip": true,
390
+ "normalized": false,
391
+ "rstrip": true,
392
  "single_word": false,
393
  "special": false
394
  },
395
  "49": {
396
  "content": "n",
397
+ "lstrip": true,
398
+ "normalized": false,
399
+ "rstrip": true,
400
  "single_word": false,
401
  "special": false
402
  },
403
  "50": {
404
  "content": "ng",
405
+ "lstrip": true,
406
+ "normalized": false,
407
+ "rstrip": true,
408
  "single_word": false,
409
  "special": false
410
  },
411
  "51": {
412
  "content": "o",
413
+ "lstrip": true,
414
+ "normalized": false,
415
+ "rstrip": true,
416
  "single_word": false,
417
  "special": false
418
  },
419
  "52": {
420
  "content": "oe",
421
+ "lstrip": true,
422
+ "normalized": false,
423
+ "rstrip": true,
424
  "single_word": false,
425
  "special": false
426
  },
427
  "53": {
428
  "content": "oek",
429
+ "lstrip": true,
430
+ "normalized": false,
431
+ "rstrip": true,
432
  "single_word": false,
433
  "special": false
434
  },
435
  "54": {
436
  "content": "oeng",
437
+ "lstrip": true,
438
+ "normalized": false,
439
+ "rstrip": true,
440
  "single_word": false,
441
  "special": false
442
  },
443
  "55": {
444
  "content": "oi",
445
+ "lstrip": true,
446
+ "normalized": false,
447
+ "rstrip": true,
448
  "single_word": false,
449
  "special": false
450
  },
451
  "56": {
452
  "content": "ok",
453
+ "lstrip": true,
454
+ "normalized": false,
455
+ "rstrip": true,
456
  "single_word": false,
457
  "special": false
458
  },
459
  "57": {
460
  "content": "on",
461
+ "lstrip": true,
462
+ "normalized": false,
463
+ "rstrip": true,
464
  "single_word": false,
465
  "special": false
466
  },
467
  "58": {
468
  "content": "ong",
469
+ "lstrip": true,
470
+ "normalized": false,
471
+ "rstrip": true,
472
  "single_word": false,
473
  "special": false
474
  },
475
  "59": {
476
  "content": "ot",
477
+ "lstrip": true,
478
+ "normalized": false,
479
+ "rstrip": true,
480
  "single_word": false,
481
  "special": false
482
  },
483
  "60": {
484
  "content": "ou",
485
+ "lstrip": true,
486
+ "normalized": false,
487
+ "rstrip": true,
488
  "single_word": false,
489
  "special": false
490
  },
491
  "61": {
492
  "content": "p",
493
+ "lstrip": true,
494
+ "normalized": false,
495
+ "rstrip": true,
496
  "single_word": false,
497
  "special": false
498
  },
499
  "62": {
500
  "content": "s",
501
+ "lstrip": true,
502
+ "normalized": false,
503
+ "rstrip": true,
504
  "single_word": false,
505
  "special": false
506
  },
507
  "63": {
508
  "content": "t",
509
+ "lstrip": true,
510
+ "normalized": false,
511
+ "rstrip": true,
512
  "single_word": false,
513
  "special": false
514
  },
515
  "64": {
516
  "content": "u",
517
+ "lstrip": true,
518
+ "normalized": false,
519
+ "rstrip": true,
520
  "single_word": false,
521
  "special": false
522
  },
523
  "65": {
524
  "content": "ui",
525
+ "lstrip": true,
526
+ "normalized": false,
527
+ "rstrip": true,
528
  "single_word": false,
529
  "special": false
530
  },
531
  "66": {
532
  "content": "uk",
533
+ "lstrip": true,
534
+ "normalized": false,
535
+ "rstrip": true,
536
  "single_word": false,
537
  "special": false
538
  },
539
  "67": {
540
  "content": "un",
541
+ "lstrip": true,
542
+ "normalized": false,
543
+ "rstrip": true,
544
  "single_word": false,
545
  "special": false
546
  },
547
  "68": {
548
  "content": "ung",
549
+ "lstrip": true,
550
+ "normalized": false,
551
+ "rstrip": true,
552
  "single_word": false,
553
  "special": false
554
  },
555
  "69": {
556
  "content": "ut",
557
+ "lstrip": true,
558
+ "normalized": false,
559
+ "rstrip": true,
560
  "single_word": false,
561
  "special": false
562
  },
563
  "70": {
564
  "content": "w",
565
+ "lstrip": true,
566
+ "normalized": false,
567
+ "rstrip": true,
568
  "single_word": false,
569
  "special": false
570
  },
571
  "71": {
572
  "content": "yu",
573
+ "lstrip": true,
574
+ "normalized": false,
575
+ "rstrip": true,
576
  "single_word": false,
577
  "special": false
578
  },
579
  "72": {
580
  "content": "yun",
581
+ "lstrip": true,
582
+ "normalized": false,
583
+ "rstrip": true,
584
  "single_word": false,
585
  "special": false
586
  },
587
  "73": {
588
  "content": "yut",
589
+ "lstrip": true,
590
+ "normalized": false,
591
+ "rstrip": true,
592
  "single_word": false,
593
  "special": false
594
  },
595
  "74": {
596
  "content": "z",
597
+ "lstrip": true,
598
+ "normalized": false,
599
+ "rstrip": true,
600
  "single_word": false,
601
  "special": false
602
  },
 
629
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
630
  "unk_token": "[UNK]",
631
  "word_delimiter_token": "|"
632
+ }
trainer_state.json ADDED
@@ -0,0 +1,374 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.042238425940266514,
3
+ "best_model_checkpoint": "checkpoints/checkpoint-4000",
4
+ "epoch": 10.0,
5
+ "eval_steps": 1000,
6
+ "global_step": 4370,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.2288329519450801,
13
+ "grad_norm": 0.8938984274864197,
14
+ "learning_rate": 1e-05,
15
+ "loss": 0.0975,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.4576659038901602,
20
+ "grad_norm": 1.1161267757415771,
21
+ "learning_rate": 2e-05,
22
+ "loss": 0.0613,
23
+ "step": 200
24
+ },
25
+ {
26
+ "epoch": 0.6864988558352403,
27
+ "grad_norm": 1.264156460762024,
28
+ "learning_rate": 3e-05,
29
+ "loss": 0.0533,
30
+ "step": 300
31
+ },
32
+ {
33
+ "epoch": 0.9153318077803204,
34
+ "grad_norm": 2.014840602874756,
35
+ "learning_rate": 4e-05,
36
+ "loss": 0.0528,
37
+ "step": 400
38
+ },
39
+ {
40
+ "epoch": 1.1441647597254005,
41
+ "grad_norm": 0.28405696153640747,
42
+ "learning_rate": 5e-05,
43
+ "loss": 0.0481,
44
+ "step": 500
45
+ },
46
+ {
47
+ "epoch": 1.3729977116704806,
48
+ "grad_norm": 0.4474036395549774,
49
+ "learning_rate": 6e-05,
50
+ "loss": 0.0549,
51
+ "step": 600
52
+ },
53
+ {
54
+ "epoch": 1.6018306636155606,
55
+ "grad_norm": 0.3621448278427124,
56
+ "learning_rate": 7e-05,
57
+ "loss": 0.0592,
58
+ "step": 700
59
+ },
60
+ {
61
+ "epoch": 1.8306636155606406,
62
+ "grad_norm": 1.5040546655654907,
63
+ "learning_rate": 8e-05,
64
+ "loss": 0.0596,
65
+ "step": 800
66
+ },
67
+ {
68
+ "epoch": 2.059496567505721,
69
+ "grad_norm": 0.5771723389625549,
70
+ "learning_rate": 9e-05,
71
+ "loss": 0.0625,
72
+ "step": 900
73
+ },
74
+ {
75
+ "epoch": 2.288329519450801,
76
+ "grad_norm": 0.7228975296020508,
77
+ "learning_rate": 0.0001,
78
+ "loss": 0.0507,
79
+ "step": 1000
80
+ },
81
+ {
82
+ "epoch": 2.288329519450801,
83
+ "eval_loss": 0.05043927580118179,
84
+ "eval_per": 0.05231171437952632,
85
+ "eval_runtime": 73.3314,
86
+ "eval_samples_per_second": 84.752,
87
+ "eval_steps_per_second": 1.336,
88
+ "eval_ter": 0.09057921991660414,
89
+ "step": 1000
90
+ },
91
+ {
92
+ "epoch": 2.517162471395881,
93
+ "grad_norm": 0.6161186695098877,
94
+ "learning_rate": 9.70326409495549e-05,
95
+ "loss": 0.0581,
96
+ "step": 1100
97
+ },
98
+ {
99
+ "epoch": 2.745995423340961,
100
+ "grad_norm": 0.4418635964393616,
101
+ "learning_rate": 9.40652818991098e-05,
102
+ "loss": 0.0578,
103
+ "step": 1200
104
+ },
105
+ {
106
+ "epoch": 2.974828375286041,
107
+ "grad_norm": 1.2202107906341553,
108
+ "learning_rate": 9.10979228486647e-05,
109
+ "loss": 0.0582,
110
+ "step": 1300
111
+ },
112
+ {
113
+ "epoch": 3.203661327231121,
114
+ "grad_norm": 0.6182931065559387,
115
+ "learning_rate": 8.813056379821959e-05,
116
+ "loss": 0.0444,
117
+ "step": 1400
118
+ },
119
+ {
120
+ "epoch": 3.4324942791762014,
121
+ "grad_norm": 0.36747854948043823,
122
+ "learning_rate": 8.516320474777448e-05,
123
+ "loss": 0.0431,
124
+ "step": 1500
125
+ },
126
+ {
127
+ "epoch": 3.6613272311212812,
128
+ "grad_norm": 0.5178420543670654,
129
+ "learning_rate": 8.219584569732938e-05,
130
+ "loss": 0.0379,
131
+ "step": 1600
132
+ },
133
+ {
134
+ "epoch": 3.8901601830663615,
135
+ "grad_norm": 0.4050116539001465,
136
+ "learning_rate": 7.922848664688428e-05,
137
+ "loss": 0.0373,
138
+ "step": 1700
139
+ },
140
+ {
141
+ "epoch": 4.118993135011442,
142
+ "grad_norm": 0.2309502214193344,
143
+ "learning_rate": 7.626112759643917e-05,
144
+ "loss": 0.0336,
145
+ "step": 1800
146
+ },
147
+ {
148
+ "epoch": 4.3478260869565215,
149
+ "grad_norm": 0.7122157216072083,
150
+ "learning_rate": 7.329376854599406e-05,
151
+ "loss": 0.0321,
152
+ "step": 1900
153
+ },
154
+ {
155
+ "epoch": 4.576659038901602,
156
+ "grad_norm": 0.20260649919509888,
157
+ "learning_rate": 7.032640949554896e-05,
158
+ "loss": 0.0327,
159
+ "step": 2000
160
+ },
161
+ {
162
+ "epoch": 4.576659038901602,
163
+ "eval_loss": 0.05170031264424324,
164
+ "eval_per": 0.044530625646617146,
165
+ "eval_runtime": 73.8457,
166
+ "eval_samples_per_second": 84.162,
167
+ "eval_steps_per_second": 1.327,
168
+ "eval_ter": 0.09002570757327888,
169
+ "step": 2000
170
+ },
171
+ {
172
+ "epoch": 4.805491990846682,
173
+ "grad_norm": 0.1853743940591812,
174
+ "learning_rate": 6.735905044510387e-05,
175
+ "loss": 0.0306,
176
+ "step": 2100
177
+ },
178
+ {
179
+ "epoch": 5.034324942791762,
180
+ "grad_norm": 0.26290130615234375,
181
+ "learning_rate": 6.439169139465876e-05,
182
+ "loss": 0.029,
183
+ "step": 2200
184
+ },
185
+ {
186
+ "epoch": 5.2631578947368425,
187
+ "grad_norm": 0.24352087080478668,
188
+ "learning_rate": 6.142433234421366e-05,
189
+ "loss": 0.0238,
190
+ "step": 2300
191
+ },
192
+ {
193
+ "epoch": 5.491990846681922,
194
+ "grad_norm": 0.506681501865387,
195
+ "learning_rate": 5.845697329376855e-05,
196
+ "loss": 0.023,
197
+ "step": 2400
198
+ },
199
+ {
200
+ "epoch": 5.720823798627002,
201
+ "grad_norm": 0.2733093202114105,
202
+ "learning_rate": 5.548961424332344e-05,
203
+ "loss": 0.025,
204
+ "step": 2500
205
+ },
206
+ {
207
+ "epoch": 5.949656750572083,
208
+ "grad_norm": 0.38764065504074097,
209
+ "learning_rate": 5.252225519287835e-05,
210
+ "loss": 0.0257,
211
+ "step": 2600
212
+ },
213
+ {
214
+ "epoch": 6.178489702517163,
215
+ "grad_norm": 0.9044945240020752,
216
+ "learning_rate": 4.9554896142433236e-05,
217
+ "loss": 0.0206,
218
+ "step": 2700
219
+ },
220
+ {
221
+ "epoch": 6.407322654462242,
222
+ "grad_norm": 0.2747916579246521,
223
+ "learning_rate": 4.658753709198813e-05,
224
+ "loss": 0.0175,
225
+ "step": 2800
226
+ },
227
+ {
228
+ "epoch": 6.636155606407323,
229
+ "grad_norm": 0.22902531921863556,
230
+ "learning_rate": 4.362017804154303e-05,
231
+ "loss": 0.0164,
232
+ "step": 2900
233
+ },
234
+ {
235
+ "epoch": 6.864988558352403,
236
+ "grad_norm": 0.28316548466682434,
237
+ "learning_rate": 4.0652818991097924e-05,
238
+ "loss": 0.0167,
239
+ "step": 3000
240
+ },
241
+ {
242
+ "epoch": 6.864988558352403,
243
+ "eval_loss": 0.044352661818265915,
244
+ "eval_per": 0.04722550908516451,
245
+ "eval_runtime": 73.6387,
246
+ "eval_samples_per_second": 84.399,
247
+ "eval_steps_per_second": 1.331,
248
+ "eval_ter": 0.08786085929716232,
249
+ "step": 3000
250
+ },
251
+ {
252
+ "epoch": 7.093821510297483,
253
+ "grad_norm": 0.220920130610466,
254
+ "learning_rate": 3.768545994065282e-05,
255
+ "loss": 0.0149,
256
+ "step": 3100
257
+ },
258
+ {
259
+ "epoch": 7.322654462242563,
260
+ "grad_norm": 2.0292646884918213,
261
+ "learning_rate": 3.4718100890207716e-05,
262
+ "loss": 0.0137,
263
+ "step": 3200
264
+ },
265
+ {
266
+ "epoch": 7.551487414187643,
267
+ "grad_norm": 0.3689746558666229,
268
+ "learning_rate": 3.175074183976261e-05,
269
+ "loss": 0.0131,
270
+ "step": 3300
271
+ },
272
+ {
273
+ "epoch": 7.780320366132723,
274
+ "grad_norm": 0.17288458347320557,
275
+ "learning_rate": 2.878338278931751e-05,
276
+ "loss": 0.0129,
277
+ "step": 3400
278
+ },
279
+ {
280
+ "epoch": 8.009153318077804,
281
+ "grad_norm": 0.13609908521175385,
282
+ "learning_rate": 2.58160237388724e-05,
283
+ "loss": 0.0117,
284
+ "step": 3500
285
+ },
286
+ {
287
+ "epoch": 8.237986270022883,
288
+ "grad_norm": 0.14868062734603882,
289
+ "learning_rate": 2.28486646884273e-05,
290
+ "loss": 0.0099,
291
+ "step": 3600
292
+ },
293
+ {
294
+ "epoch": 8.466819221967963,
295
+ "grad_norm": 0.4655757248401642,
296
+ "learning_rate": 1.9881305637982196e-05,
297
+ "loss": 0.0089,
298
+ "step": 3700
299
+ },
300
+ {
301
+ "epoch": 8.695652173913043,
302
+ "grad_norm": 0.41903799772262573,
303
+ "learning_rate": 1.6913946587537096e-05,
304
+ "loss": 0.009,
305
+ "step": 3800
306
+ },
307
+ {
308
+ "epoch": 8.924485125858123,
309
+ "grad_norm": 0.17405986785888672,
310
+ "learning_rate": 1.394658753709199e-05,
311
+ "loss": 0.0085,
312
+ "step": 3900
313
+ },
314
+ {
315
+ "epoch": 9.153318077803204,
316
+ "grad_norm": 0.16548456251621246,
317
+ "learning_rate": 1.0979228486646884e-05,
318
+ "loss": 0.0076,
319
+ "step": 4000
320
+ },
321
+ {
322
+ "epoch": 9.153318077803204,
323
+ "eval_loss": 0.04819780960679054,
324
+ "eval_per": 0.042238425940266514,
325
+ "eval_runtime": 74.5094,
326
+ "eval_samples_per_second": 83.412,
327
+ "eval_steps_per_second": 1.315,
328
+ "eval_ter": 0.08691373817636133,
329
+ "step": 4000
330
+ },
331
+ {
332
+ "epoch": 9.382151029748284,
333
+ "grad_norm": 0.16761469841003418,
334
+ "learning_rate": 8.011869436201782e-06,
335
+ "loss": 0.0068,
336
+ "step": 4100
337
+ },
338
+ {
339
+ "epoch": 9.610983981693364,
340
+ "grad_norm": 0.23434874415397644,
341
+ "learning_rate": 5.044510385756677e-06,
342
+ "loss": 0.0062,
343
+ "step": 4200
344
+ },
345
+ {
346
+ "epoch": 9.839816933638444,
347
+ "grad_norm": 0.1882612109184265,
348
+ "learning_rate": 2.0771513353115726e-06,
349
+ "loss": 0.0065,
350
+ "step": 4300
351
+ }
352
+ ],
353
+ "logging_steps": 100,
354
+ "max_steps": 4370,
355
+ "num_input_tokens_seen": 0,
356
+ "num_train_epochs": 10,
357
+ "save_steps": 1000,
358
+ "stateful_callbacks": {
359
+ "TrainerControl": {
360
+ "args": {
361
+ "should_epoch_stop": false,
362
+ "should_evaluate": false,
363
+ "should_log": false,
364
+ "should_save": true,
365
+ "should_training_stop": true
366
+ },
367
+ "attributes": {}
368
+ }
369
+ },
370
+ "total_flos": 6.034289580760073e+19,
371
+ "train_batch_size": 128,
372
+ "trial_name": null,
373
+ "trial_params": null
374
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d9caa9001c04b6d8641bb9481f784d114c6a84d8ccc3e48c93f2a2b2f021edb
3
+ size 5304