Automatic Speech Recognition
ESPnet
English
audio
“siddhu001” commited on
Commit
fd66565
·
1 Parent(s): 46e1bd0

Update model

Browse files
Files changed (21) hide show
  1. README.md +823 -0
  2. data/en_token_list/bpe_unigram500/bpe.model +3 -0
  3. exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/RESULTS.md +1 -0
  4. exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/config.yaml +748 -0
  5. exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/images/acc.png +0 -0
  6. exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/images/backward_time.png +0 -0
  7. exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/images/cer.png +0 -0
  8. exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/images/clip.png +0 -0
  9. exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/images/forward_time.png +0 -0
  10. exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/images/gpu_max_cached_mem_GB.png +0 -0
  11. exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/images/grad_norm.png +0 -0
  12. exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/images/iter_time.png +0 -0
  13. exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/images/loss.png +0 -0
  14. exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/images/loss_att.png +0 -0
  15. exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/images/loss_scale.png +0 -0
  16. exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/images/optim0_lr0.png +0 -0
  17. exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/images/optim_step_time.png +0 -0
  18. exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/images/train_time.png +0 -0
  19. exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/images/wer.png +0 -0
  20. exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/valid.acc.ave_10best.pth +3 -0
  21. meta.yaml +8 -0
README.md ADDED
@@ -0,0 +1,823 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - automatic-speech-recognition
6
+ language: en
7
+ datasets:
8
+ - slue-ted
9
+ license: cc-by-4.0
10
+ ---
11
+
12
+ ## ESPnet2 ASR model
13
+
14
+ ### `espnet/slueted_owsm_summ`
15
+
16
+ This model was trained by “siddhu001” using slue-ted recipe in [espnet](https://github.com/espnet/espnet/).
17
+
18
+ ### Demo: How to use in ESPnet2
19
+
20
+ Follow the [ESPnet installation instructions](https://espnet.github.io/espnet/installation.html)
21
+ if you haven't done that already.
22
+
23
+ ```bash
24
+ cd espnet
25
+ git checkout e23ef85f0b3116ad5c60d0833f186da0deec0734
26
+ pip install -e .
27
+ cd egs2/slue-ted/slu1
28
+ ./run.sh --skip_data_prep false --skip_train true --download_model espnet/slueted_owsm_summ
29
+ ```
30
+
31
+ RESULT 23.72001537034864 5.0727273644425015 16.49858244502415 22.432552212134674 83.55819195438502
32
+
33
+ ## ASR config
34
+
35
+ <details><summary>expand</summary>
36
+
37
+ ```
38
+ config: conf//train_asr_owsm_weighted_dropout.yaml
39
+ print_config: false
40
+ log_level: INFO
41
+ drop_last_iter: false
42
+ dry_run: false
43
+ iterator_type: sequence
44
+ valid_iterator_type: null
45
+ output_dir: exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp
46
+ ngpu: 1
47
+ seed: 2022
48
+ num_workers: 2
49
+ num_att_plot: 3
50
+ dist_backend: nccl
51
+ dist_init_method: env://
52
+ dist_world_size: 4
53
+ dist_rank: 0
54
+ local_rank: 0
55
+ dist_master_addr: localhost
56
+ dist_master_port: 55179
57
+ dist_launcher: null
58
+ multiprocessing_distributed: true
59
+ unused_parameters: false
60
+ sharded_ddp: false
61
+ cudnn_enabled: true
62
+ cudnn_benchmark: false
63
+ cudnn_deterministic: true
64
+ collect_stats: false
65
+ write_collected_feats: false
66
+ max_epoch: 25
67
+ patience: null
68
+ val_scheduler_criterion:
69
+ - valid
70
+ - loss
71
+ early_stopping_criterion:
72
+ - valid
73
+ - loss
74
+ - min
75
+ best_model_criterion:
76
+ - - valid
77
+ - acc
78
+ - max
79
+ keep_nbest_models: 10
80
+ nbest_averaging_interval: 0
81
+ grad_clip: 5.0
82
+ grad_clip_type: 2.0
83
+ grad_noise: false
84
+ accum_grad: 1
85
+ no_forward_run: false
86
+ resume: true
87
+ train_dtype: float32
88
+ use_amp: false
89
+ log_interval: 100
90
+ use_matplotlib: true
91
+ use_tensorboard: true
92
+ create_graph_in_tensorboard: false
93
+ use_wandb: false
94
+ wandb_project: null
95
+ wandb_id: null
96
+ wandb_entity: null
97
+ wandb_name: null
98
+ wandb_model_log_interval: -1
99
+ detect_anomaly: false
100
+ use_lora: false
101
+ save_lora_only: true
102
+ lora_conf: {}
103
+ pretrain_path: null
104
+ init_param:
105
+ - /scratch/bbjs/arora1/espnet_slue_PR/espnet/egs2/tedlium3/asr1/exp/asr_train_asr_owsm_weighted_dropout_raw_en_bpe500/valid.acc.ave_10best.pth:::ctc
106
+ ignore_init_mismatch: false
107
+ freeze_param:
108
+ - encoder
109
+ num_iters_per_epoch: null
110
+ batch_size: 20
111
+ valid_batch_size: null
112
+ batch_bins: 12000000
113
+ valid_batch_bins: null
114
+ train_shape_file:
115
+ - exp/slu_stats_raw_en_bpe500_sp/train/speech_shape
116
+ - exp/slu_stats_raw_en_bpe500_sp/train/text_shape.bpe
117
+ valid_shape_file:
118
+ - exp/slu_stats_raw_en_bpe500_sp/valid/speech_shape
119
+ - exp/slu_stats_raw_en_bpe500_sp/valid/text_shape.bpe
120
+ batch_type: numel
121
+ valid_batch_type: null
122
+ fold_length:
123
+ - 80000
124
+ - 150
125
+ sort_in_batch: descending
126
+ shuffle_within_batch: false
127
+ sort_batch: descending
128
+ multiple_iterator: false
129
+ chunk_length: 500
130
+ chunk_shift_ratio: 0.5
131
+ num_cache_chunks: 1024
132
+ chunk_excluded_key_prefixes: []
133
+ chunk_default_fs: null
134
+ train_data_path_and_name_and_type:
135
+ - - dump/raw/train_sp/wav.scp
136
+ - speech
137
+ - kaldi_ark
138
+ - - dump/raw/train_sp/text
139
+ - text
140
+ - text
141
+ valid_data_path_and_name_and_type:
142
+ - - dump/raw/devel/wav.scp
143
+ - speech
144
+ - kaldi_ark
145
+ - - dump/raw/devel/text
146
+ - text
147
+ - text
148
+ allow_variable_data_keys: false
149
+ max_cache_size: 0.0
150
+ max_cache_fd: 32
151
+ allow_multi_rates: false
152
+ valid_max_cache_size: null
153
+ exclude_weight_decay: false
154
+ exclude_weight_decay_conf: {}
155
+ optim: adam
156
+ optim_conf:
157
+ lr: 0.002
158
+ weight_decay: 1.0e-06
159
+ scheduler: warmuplr
160
+ scheduler_conf:
161
+ warmup_steps: 5000
162
+ token_list:
163
+ - <blank>
164
+ - <unk>
165
+ - '[sep]'
166
+ - '&quot;'
167
+ - s
168
+ - ▁
169
+ - ▁the
170
+ - ','
171
+ - t
172
+ - d
173
+ - ▁a
174
+ - .
175
+ - ing
176
+ - o
177
+ - e
178
+ - ▁to
179
+ - a
180
+ - ▁and
181
+ - y
182
+ - n
183
+ - ▁of
184
+ - r
185
+ - ▁in
186
+ - u
187
+ - i
188
+ - m
189
+ - p
190
+ - c
191
+ - er
192
+ - g
193
+ - l
194
+ - al
195
+ - re
196
+ - ed
197
+ - b
198
+ - ''''
199
+ - ar
200
+ - k
201
+ - in
202
+ - f
203
+ - ▁"
204
+ - le
205
+ - 'on'
206
+ - v
207
+ - or
208
+ - th
209
+ - '-'
210
+ - ▁c
211
+ - en
212
+ - ▁f
213
+ - ▁--
214
+ - ▁we
215
+ - ▁for
216
+ - ▁how
217
+ - ly
218
+ - ▁re
219
+ - se
220
+ - ▁that
221
+ - es
222
+ - w
223
+ - ic
224
+ - st
225
+ - ▁w
226
+ - ▁be
227
+ - ri
228
+ - an
229
+ - ra
230
+ - ve
231
+ - ce
232
+ - ur
233
+ - ▁by
234
+ - ▁it
235
+ - li
236
+ - ▁de
237
+ - '?'
238
+ - it
239
+ - ch
240
+ - ent
241
+ - ▁is
242
+ - ter
243
+ - el
244
+ - ▁on
245
+ - ▁e
246
+ - ▁he
247
+ - ▁co
248
+ - ▁an
249
+ - ▁ma
250
+ - ▁st
251
+ - ll
252
+ - ▁with
253
+ - ▁can
254
+ - il
255
+ - ▁you
256
+ - ▁us
257
+ - ation
258
+ - te
259
+ - ▁this
260
+ - ▁b
261
+ - ▁do
262
+ - ▁g
263
+ - me
264
+ - ▁what
265
+ - ck
266
+ - ▁from
267
+ - ate
268
+ - ▁p
269
+ - z
270
+ - la
271
+ - ▁mo
272
+ - ▁di
273
+ - ive
274
+ - mp
275
+ - ▁talk
276
+ - ity
277
+ - vi
278
+ - ta
279
+ - at
280
+ - ge
281
+ - ▁tr
282
+ - ▁she
283
+ - ▁our
284
+ - ▁pa
285
+ - ci
286
+ - et
287
+ - h
288
+ - ▁su
289
+ - ver
290
+ - ▁world
291
+ - pe
292
+ - ▁about
293
+ - ▁me
294
+ - ▁so
295
+ - and
296
+ - ▁con
297
+ - tion
298
+ - de
299
+ - ir
300
+ - ▁her
301
+ - im
302
+ - ':'
303
+ - ▁his
304
+ - ies
305
+ - ▁po
306
+ - ▁are
307
+ - ect
308
+ - lo
309
+ - ▁your
310
+ - un
311
+ - ist
312
+ - hi
313
+ - ▁mi
314
+ - x
315
+ - id
316
+ - ment
317
+ - ol
318
+ - ul
319
+ - ti
320
+ - ne
321
+ - qu
322
+ - ▁but
323
+ - ▁ca
324
+ - ▁fa
325
+ - ▁as
326
+ - ▁un
327
+ - ers
328
+ - ight
329
+ - ▁says
330
+ - '0'
331
+ - ng
332
+ - op
333
+ - '1'
334
+ - ▁k
335
+ - ad
336
+ - j
337
+ - ma
338
+ - ▁pro
339
+ - ▁work
340
+ - ▁ba
341
+ - ▁share
342
+ - ▁new
343
+ - ▁more
344
+ - ▁vi
345
+ - ▁sa
346
+ - ▁at
347
+ - ▁la
348
+ - ut
349
+ - bi
350
+ - sion
351
+ - ▁ho
352
+ - na
353
+ - act
354
+ - age
355
+ - ke
356
+ - if
357
+ - ▁bo
358
+ - ▁br
359
+ - ▁ha
360
+ - ▁no
361
+ - co
362
+ - ▁lo
363
+ - mi
364
+ - ▁make
365
+ - ▁people
366
+ - ▁why
367
+ - ant
368
+ - ▁their
369
+ - ▁i
370
+ - ▁life
371
+ - ▁all
372
+ - ting
373
+ - ▁human
374
+ - ▁have
375
+ - om
376
+ - )
377
+ - ▁(
378
+ - ▁help
379
+ - ▁ted
380
+ - wa
381
+ - sh
382
+ - ▁da
383
+ - ▁le
384
+ - ▁out
385
+ - ph
386
+ - ical
387
+ - ▁way
388
+ - ff
389
+ - ▁ro
390
+ - able
391
+ - ▁some
392
+ - est
393
+ - ure
394
+ - em
395
+ - ho
396
+ - ▁ex
397
+ - gen
398
+ - ha
399
+ - ia
400
+ - ine
401
+ - ▁into
402
+ - ca
403
+ - ▁was
404
+ - ▁who
405
+ - ther
406
+ - ▁they
407
+ - ow
408
+ - he
409
+ - ▁one
410
+ - ▁when
411
+ - form
412
+ - ▁pre
413
+ - ni
414
+ - ▁could
415
+ - ▁like
416
+ - ▁per
417
+ - ▁up
418
+ - ance
419
+ - com
420
+ - ▁go
421
+ - ion
422
+ - tor
423
+ - ▁fe
424
+ - ▁ra
425
+ - ▁or
426
+ - ▁en
427
+ - ▁change
428
+ - tic
429
+ - ▁every
430
+ - ▁jo
431
+ - ence
432
+ - ▁not
433
+ - ▁art
434
+ - one
435
+ - use
436
+ - ous
437
+ - ▁plan
438
+ - ▁music
439
+ - ▁exp
440
+ - und
441
+ - ▁ne
442
+ - um
443
+ - ative
444
+ - pp
445
+ - ▁need
446
+ - tro
447
+ - directed
448
+ - ▁learn
449
+ - ▁narrate
450
+ - ▁has
451
+ - lar
452
+ - '].'
453
+ - man
454
+ - ▁car
455
+ - ▁future
456
+ - ▁real
457
+ - ▁time
458
+ - ize
459
+ - ▁live
460
+ - ber
461
+ - ▁mar
462
+ - ▁ga
463
+ - ▁take
464
+ - ▁dr
465
+ - ful
466
+ - ▁get
467
+ - ▁shows
468
+ - day
469
+ - ▁cha
470
+ - ▁than
471
+ - ▁know
472
+ - ian
473
+ - ▁see
474
+ - ▁just
475
+ - '2'
476
+ - ▁other
477
+ - old
478
+ - ▁design
479
+ - ▁chi
480
+ - ▁build
481
+ - ious
482
+ - ▁most
483
+ - ▁si
484
+ - ▁will
485
+ - ▁power
486
+ - ▁think
487
+ - port
488
+ - ▁over
489
+ - ▁ja
490
+ - ish
491
+ - ▁climate
492
+ - ▁sha
493
+ - ▁through
494
+ - less
495
+ - '3'
496
+ - ▁my
497
+ - ▁where
498
+ - ▁global
499
+ - ▁health
500
+ - ▁pri
501
+ - ▁20
502
+ - ▁story
503
+ - gu
504
+ - ugh
505
+ - ▁create
506
+ - ▁look
507
+ - ▁trans
508
+ - ▁har
509
+ - ▁even
510
+ - ▁part
511
+ - ▁years
512
+ - ▁lead
513
+ - side
514
+ - low
515
+ - long
516
+ - ▁technolog
517
+ - ness
518
+ - '5'
519
+ - ▁call
520
+ - ▁sc
521
+ - ▁system
522
+ - '9'
523
+ - line
524
+ - ▁brain
525
+ - ▁data
526
+ - ▁own
527
+ - ition
528
+ - ▁explains
529
+ - ▁tell
530
+ - ▁explore
531
+ - ▁start
532
+ - ▁ru
533
+ - ▁which
534
+ - ▁anderson
535
+ - ▁find
536
+ - ▁hu
537
+ - ▁women
538
+ - ▁better
539
+ - ▁idea
540
+ - ▁history
541
+ - ▁research
542
+ - ▁science
543
+ - ism
544
+ - ▁first
545
+ - ▁grow
546
+ - ▁right
547
+ - clu
548
+ - ▁space
549
+ - ▁develop
550
+ - ▁problem
551
+ - ▁two
552
+ - ▁earth
553
+ - ologist
554
+ - ▁many
555
+ - ▁should
556
+ - ▁three
557
+ - ▁fellow
558
+ - ▁social
559
+ - ▁africa
560
+ - ▁...
561
+ - '4'
562
+ - ▁addis
563
+ - ▁powerful
564
+ - ▁found
565
+ - ▁under
566
+ - ▁understand
567
+ - ▁after
568
+ - ▁stories
569
+ - ▁around
570
+ - ▁personal
571
+ - ▁project
572
+ - ▁between
573
+ - ▁question
574
+ - ▁play
575
+ - ▁scientist
576
+ - ▁happen
577
+ - ▁good
578
+ - ▁produc
579
+ - ▁experience
580
+ - ▁step
581
+ - ▁america
582
+ - '8'
583
+ - ▁great
584
+ - ▁down
585
+ - ▁high
586
+ - ▁would
587
+ - ▁turn
588
+ - ▁surprising
589
+ - ▁imagin
590
+ - ▁teach
591
+ - cross
592
+ - ▁place
593
+ - ▁medic
594
+ - ▁million
595
+ - ▁things
596
+ - '7'
597
+ - ▁reveal
598
+ - ▁without
599
+ - ▁challenge
600
+ - ▁next
601
+ - ▁each
602
+ - ▁studio
603
+ - organ
604
+ - '6'
605
+ - ▁business
606
+ - ▁much
607
+ - ▁show
608
+ - ▁conversation
609
+ - ▁energy
610
+ - ▁school
611
+ - ▁ocean
612
+ - ▁while
613
+ - source
614
+ - ization
615
+ - ▁break
616
+ - ▁robot
617
+ - ▁disease
618
+ - ▁behind
619
+ - ability
620
+ - ▁team
621
+ - ▁chris
622
+ - ▁become
623
+ - ▁solution
624
+ - ▁protect
625
+ - ▁collect
626
+ - ▁different
627
+ - ▁those
628
+ - ▁connect
629
+ - ▁architect
630
+ - ▁language
631
+ - ▁simple
632
+ - ▁solve
633
+ - ▁before
634
+ - ▁community
635
+ - ▁country
636
+ - ▁secret
637
+ - ▁keep
638
+ - ▁food
639
+ - ▁thought
640
+ - ▁discover
641
+ - ▁environment
642
+ - ▁government
643
+ - ▁public
644
+ - ;
645
+ - '!'
646
+ - /
647
+ - q
648
+ - '%'
649
+ - '@'
650
+ - ']'
651
+ - +
652
+ - '&'
653
+ - '|'
654
+ - _
655
+ - (
656
+ - '"'
657
+ - $
658
+ - '*'
659
+ - '='
660
+ - '['
661
+ - '`'
662
+ - <sos/eos>
663
+ transcript_token_list: null
664
+ two_pass: false
665
+ pre_postencoder_norm: false
666
+ init: null
667
+ input_size: null
668
+ ctc_conf:
669
+ dropout_rate: 0.0
670
+ ctc_type: builtin
671
+ reduce: true
672
+ ignore_nan_grad: null
673
+ zero_infinity: true
674
+ brctc_risk_strategy: exp
675
+ brctc_group_strategy: end
676
+ brctc_risk_factor: 0.0
677
+ joint_net_conf: null
678
+ use_preprocessor: true
679
+ token_type: bpe
680
+ bpemodel: data/en_token_list/bpe_unigram500/bpe.model
681
+ non_linguistic_symbols: null
682
+ cleaner: null
683
+ g2p: null
684
+ speech_volume_normalize: null
685
+ rir_scp: null
686
+ rir_apply_prob: 1.0
687
+ noise_scp: null
688
+ noise_apply_prob: 1.0
689
+ noise_db_range: '13_15'
690
+ short_noise_thres: 0.5
691
+ frontend: default
692
+ frontend_conf:
693
+ n_fft: 512
694
+ win_length: 400
695
+ hop_length: 160
696
+ fs: 16k
697
+ specaug: specaug
698
+ specaug_conf:
699
+ apply_time_warp: false
700
+ time_warp_window: 5
701
+ time_warp_mode: bicubic
702
+ apply_freq_mask: true
703
+ freq_mask_width_range:
704
+ - 0
705
+ - 27
706
+ num_freq_mask: 2
707
+ apply_time_mask: true
708
+ time_mask_width_ratio_range:
709
+ - 0.0
710
+ - 0.05
711
+ num_time_mask: 10
712
+ normalize: global_mvn
713
+ normalize_conf:
714
+ stats_file: /scratch/bbjs/arora1/new_download_espnet_egs2/harpervalley/slu1_superb_onlyda/owsm_v3.1_ebf/exp/s2t_stats_raw_bpe50000/train/feats_stats.npz
715
+ model: espnet
716
+ model_conf:
717
+ ctc_weight: 0.0
718
+ lsm_weight: 0.1
719
+ length_normalized_loss: false
720
+ weighted_sum: true
721
+ extract_feats_in_collect_stats: false
722
+ preencoder: null
723
+ preencoder_conf: {}
724
+ encoder: e_branchformer
725
+ encoder_conf:
726
+ output_size: 1024
727
+ attention_heads: 16
728
+ attention_layer_type: selfattn
729
+ pos_enc_layer_type: abs_pos
730
+ rel_pos_type: latest
731
+ cgmlp_linear_units: 4096
732
+ cgmlp_conv_kernel: 31
733
+ use_linear_after_conv: false
734
+ gate_activation: identity
735
+ num_blocks: 18
736
+ dropout_rate: 0.2
737
+ positional_dropout_rate: 0.2
738
+ attention_dropout_rate: 0.2
739
+ input_layer: conv2d
740
+ layer_drop_rate: 0.0
741
+ linear_units: 4096
742
+ positionwise_layer_type: linear
743
+ use_ffn: true
744
+ macaron_ffn: true
745
+ merge_conv_kernel: 31
746
+ prepostencoder: linear
747
+ prepostencoder_conf:
748
+ input_size: 1024
749
+ output_size: 80
750
+ postencoder: conformer_full
751
+ postencoder_conf:
752
+ output_size: 256
753
+ attention_heads: 4
754
+ linear_units: 1024
755
+ num_blocks: 12
756
+ dropout_rate: 0.1
757
+ positional_dropout_rate: 0.2
758
+ attention_dropout_rate: 0.1
759
+ input_layer: conv2d1
760
+ normalize_before: true
761
+ macaron_style: true
762
+ rel_pos_type: latest
763
+ pos_enc_layer_type: rel_pos
764
+ selfattention_layer_type: rel_selfattn
765
+ activation_type: swish
766
+ use_cnn_module: true
767
+ cnn_module_kernel: 31
768
+ deliberationencoder: null
769
+ deliberationencoder_conf: {}
770
+ decoder: transformer
771
+ decoder_conf:
772
+ attention_heads: 4
773
+ linear_units: 2048
774
+ num_blocks: 6
775
+ dropout_rate: 0.1
776
+ positional_dropout_rate: 0.1
777
+ self_attention_dropout_rate: 0.1
778
+ src_attention_dropout_rate: 0.1
779
+ postdecoder: null
780
+ postdecoder_conf: {}
781
+ required:
782
+ - output_dir
783
+ - token_list
784
+ version: '202310'
785
+ distributed: true
786
+ ```
787
+
788
+ </details>
789
+
790
+
791
+
792
+ ### Citing ESPnet
793
+
794
+ ```BibTex
795
+ @inproceedings{watanabe2018espnet,
796
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
797
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
798
+ year={2018},
799
+ booktitle={Proceedings of Interspeech},
800
+ pages={2207--2211},
801
+ doi={10.21437/Interspeech.2018-1456},
802
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
803
+ }
804
+
805
+
806
+
807
+
808
+
809
+
810
+ ```
811
+
812
+ or arXiv:
813
+
814
+ ```bibtex
815
+ @misc{watanabe2018espnet,
816
+ title={ESPnet: End-to-End Speech Processing Toolkit},
817
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
818
+ year={2018},
819
+ eprint={1804.00015},
820
+ archivePrefix={arXiv},
821
+ primaryClass={cs.CL}
822
+ }
823
+ ```
data/en_token_list/bpe_unigram500/bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9070b5fe2b321a13162f2aa8159c6f637402b46fb032510dbb1dabfcf0afa24f
3
+ size 244766
exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/RESULTS.md ADDED
@@ -0,0 +1 @@
 
 
1
+ RESULT 23.72001537034864 5.0727273644425015 16.49858244502415 22.432552212134674 83.55819195438502
exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/config.yaml ADDED
@@ -0,0 +1,748 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf//train_asr_owsm_weighted_dropout.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ drop_last_iter: false
5
+ dry_run: false
6
+ iterator_type: sequence
7
+ valid_iterator_type: null
8
+ output_dir: exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp
9
+ ngpu: 1
10
+ seed: 2022
11
+ num_workers: 2
12
+ num_att_plot: 3
13
+ dist_backend: nccl
14
+ dist_init_method: env://
15
+ dist_world_size: 4
16
+ dist_rank: 0
17
+ local_rank: 0
18
+ dist_master_addr: localhost
19
+ dist_master_port: 55179
20
+ dist_launcher: null
21
+ multiprocessing_distributed: true
22
+ unused_parameters: false
23
+ sharded_ddp: false
24
+ cudnn_enabled: true
25
+ cudnn_benchmark: false
26
+ cudnn_deterministic: true
27
+ collect_stats: false
28
+ write_collected_feats: false
29
+ max_epoch: 25
30
+ patience: null
31
+ val_scheduler_criterion:
32
+ - valid
33
+ - loss
34
+ early_stopping_criterion:
35
+ - valid
36
+ - loss
37
+ - min
38
+ best_model_criterion:
39
+ - - valid
40
+ - acc
41
+ - max
42
+ keep_nbest_models: 10
43
+ nbest_averaging_interval: 0
44
+ grad_clip: 5.0
45
+ grad_clip_type: 2.0
46
+ grad_noise: false
47
+ accum_grad: 1
48
+ no_forward_run: false
49
+ resume: true
50
+ train_dtype: float32
51
+ use_amp: false
52
+ log_interval: 100
53
+ use_matplotlib: true
54
+ use_tensorboard: true
55
+ create_graph_in_tensorboard: false
56
+ use_wandb: false
57
+ wandb_project: null
58
+ wandb_id: null
59
+ wandb_entity: null
60
+ wandb_name: null
61
+ wandb_model_log_interval: -1
62
+ detect_anomaly: false
63
+ use_lora: false
64
+ save_lora_only: true
65
+ lora_conf: {}
66
+ pretrain_path: null
67
+ init_param:
68
+ - /scratch/bbjs/arora1/espnet_slue_PR/espnet/egs2/tedlium3/asr1/exp/asr_train_asr_owsm_weighted_dropout_raw_en_bpe500/valid.acc.ave_10best.pth:::ctc
69
+ ignore_init_mismatch: false
70
+ freeze_param:
71
+ - encoder
72
+ num_iters_per_epoch: null
73
+ batch_size: 20
74
+ valid_batch_size: null
75
+ batch_bins: 12000000
76
+ valid_batch_bins: null
77
+ train_shape_file:
78
+ - exp/slu_stats_raw_en_bpe500_sp/train/speech_shape
79
+ - exp/slu_stats_raw_en_bpe500_sp/train/text_shape.bpe
80
+ valid_shape_file:
81
+ - exp/slu_stats_raw_en_bpe500_sp/valid/speech_shape
82
+ - exp/slu_stats_raw_en_bpe500_sp/valid/text_shape.bpe
83
+ batch_type: numel
84
+ valid_batch_type: null
85
+ fold_length:
86
+ - 80000
87
+ - 150
88
+ sort_in_batch: descending
89
+ shuffle_within_batch: false
90
+ sort_batch: descending
91
+ multiple_iterator: false
92
+ chunk_length: 500
93
+ chunk_shift_ratio: 0.5
94
+ num_cache_chunks: 1024
95
+ chunk_excluded_key_prefixes: []
96
+ chunk_default_fs: null
97
+ train_data_path_and_name_and_type:
98
+ - - dump/raw/train_sp/wav.scp
99
+ - speech
100
+ - kaldi_ark
101
+ - - dump/raw/train_sp/text
102
+ - text
103
+ - text
104
+ valid_data_path_and_name_and_type:
105
+ - - dump/raw/devel/wav.scp
106
+ - speech
107
+ - kaldi_ark
108
+ - - dump/raw/devel/text
109
+ - text
110
+ - text
111
+ allow_variable_data_keys: false
112
+ max_cache_size: 0.0
113
+ max_cache_fd: 32
114
+ allow_multi_rates: false
115
+ valid_max_cache_size: null
116
+ exclude_weight_decay: false
117
+ exclude_weight_decay_conf: {}
118
+ optim: adam
119
+ optim_conf:
120
+ lr: 0.002
121
+ weight_decay: 1.0e-06
122
+ scheduler: warmuplr
123
+ scheduler_conf:
124
+ warmup_steps: 5000
125
+ token_list:
126
+ - <blank>
127
+ - <unk>
128
+ - '[sep]'
129
+ - '&quot;'
130
+ - s
131
+ - ▁
132
+ - ▁the
133
+ - ','
134
+ - t
135
+ - d
136
+ - ▁a
137
+ - .
138
+ - ing
139
+ - o
140
+ - e
141
+ - ▁to
142
+ - a
143
+ - ▁and
144
+ - y
145
+ - n
146
+ - ▁of
147
+ - r
148
+ - ▁in
149
+ - u
150
+ - i
151
+ - m
152
+ - p
153
+ - c
154
+ - er
155
+ - g
156
+ - l
157
+ - al
158
+ - re
159
+ - ed
160
+ - b
161
+ - ''''
162
+ - ar
163
+ - k
164
+ - in
165
+ - f
166
+ - ▁"
167
+ - le
168
+ - 'on'
169
+ - v
170
+ - or
171
+ - th
172
+ - '-'
173
+ - ▁c
174
+ - en
175
+ - ▁f
176
+ - ▁--
177
+ - ▁we
178
+ - ▁for
179
+ - ▁how
180
+ - ly
181
+ - ▁re
182
+ - se
183
+ - ▁that
184
+ - es
185
+ - w
186
+ - ic
187
+ - st
188
+ - ▁w
189
+ - ▁be
190
+ - ri
191
+ - an
192
+ - ra
193
+ - ve
194
+ - ce
195
+ - ur
196
+ - ▁by
197
+ - ▁it
198
+ - li
199
+ - ▁de
200
+ - '?'
201
+ - it
202
+ - ch
203
+ - ent
204
+ - ▁is
205
+ - ter
206
+ - el
207
+ - ▁on
208
+ - ▁e
209
+ - ▁he
210
+ - ▁co
211
+ - ▁an
212
+ - ▁ma
213
+ - ▁st
214
+ - ll
215
+ - ▁with
216
+ - ▁can
217
+ - il
218
+ - ▁you
219
+ - ▁us
220
+ - ation
221
+ - te
222
+ - ▁this
223
+ - ▁b
224
+ - ▁do
225
+ - ▁g
226
+ - me
227
+ - ▁what
228
+ - ck
229
+ - ▁from
230
+ - ate
231
+ - ▁p
232
+ - z
233
+ - la
234
+ - ▁mo
235
+ - ▁di
236
+ - ive
237
+ - mp
238
+ - ▁talk
239
+ - ity
240
+ - vi
241
+ - ta
242
+ - at
243
+ - ge
244
+ - ▁tr
245
+ - ▁she
246
+ - ▁our
247
+ - ▁pa
248
+ - ci
249
+ - et
250
+ - h
251
+ - ▁su
252
+ - ver
253
+ - ▁world
254
+ - pe
255
+ - ▁about
256
+ - ▁me
257
+ - ▁so
258
+ - and
259
+ - ▁con
260
+ - tion
261
+ - de
262
+ - ir
263
+ - ▁her
264
+ - im
265
+ - ':'
266
+ - ▁his
267
+ - ies
268
+ - ▁po
269
+ - ▁are
270
+ - ect
271
+ - lo
272
+ - ▁your
273
+ - un
274
+ - ist
275
+ - hi
276
+ - ▁mi
277
+ - x
278
+ - id
279
+ - ment
280
+ - ol
281
+ - ul
282
+ - ti
283
+ - ne
284
+ - qu
285
+ - ▁but
286
+ - ▁ca
287
+ - ▁fa
288
+ - ▁as
289
+ - ▁un
290
+ - ers
291
+ - ight
292
+ - ▁says
293
+ - '0'
294
+ - ng
295
+ - op
296
+ - '1'
297
+ - ▁k
298
+ - ad
299
+ - j
300
+ - ma
301
+ - ▁pro
302
+ - ▁work
303
+ - ▁ba
304
+ - ▁share
305
+ - ▁new
306
+ - ▁more
307
+ - ▁vi
308
+ - ▁sa
309
+ - ▁at
310
+ - ▁la
311
+ - ut
312
+ - bi
313
+ - sion
314
+ - ▁ho
315
+ - na
316
+ - act
317
+ - age
318
+ - ke
319
+ - if
320
+ - ▁bo
321
+ - ▁br
322
+ - ▁ha
323
+ - ▁no
324
+ - co
325
+ - ▁lo
326
+ - mi
327
+ - ▁make
328
+ - ▁people
329
+ - ▁why
330
+ - ant
331
+ - ▁their
332
+ - ▁i
333
+ - ▁life
334
+ - ▁all
335
+ - ting
336
+ - ▁human
337
+ - ▁have
338
+ - om
339
+ - )
340
+ - ▁(
341
+ - ▁help
342
+ - ▁ted
343
+ - wa
344
+ - sh
345
+ - ▁da
346
+ - ▁le
347
+ - ▁out
348
+ - ph
349
+ - ical
350
+ - ▁way
351
+ - ff
352
+ - ▁ro
353
+ - able
354
+ - ▁some
355
+ - est
356
+ - ure
357
+ - em
358
+ - ho
359
+ - ▁ex
360
+ - gen
361
+ - ha
362
+ - ia
363
+ - ine
364
+ - ▁into
365
+ - ca
366
+ - ▁was
367
+ - ▁who
368
+ - ther
369
+ - ▁they
370
+ - ow
371
+ - he
372
+ - ▁one
373
+ - ▁when
374
+ - form
375
+ - ▁pre
376
+ - ni
377
+ - ▁could
378
+ - ▁like
379
+ - ▁per
380
+ - ▁up
381
+ - ance
382
+ - com
383
+ - ▁go
384
+ - ion
385
+ - tor
386
+ - ▁fe
387
+ - ▁ra
388
+ - ▁or
389
+ - ▁en
390
+ - ▁change
391
+ - tic
392
+ - ▁every
393
+ - ▁jo
394
+ - ence
395
+ - ▁not
396
+ - ▁art
397
+ - one
398
+ - use
399
+ - ous
400
+ - ▁plan
401
+ - ▁music
402
+ - ▁exp
403
+ - und
404
+ - ▁ne
405
+ - um
406
+ - ative
407
+ - pp
408
+ - ▁need
409
+ - tro
410
+ - directed
411
+ - ▁learn
412
+ - ▁narrate
413
+ - ▁has
414
+ - lar
415
+ - '].'
416
+ - man
417
+ - ▁car
418
+ - ▁future
419
+ - ▁real
420
+ - ▁time
421
+ - ize
422
+ - ▁live
423
+ - ber
424
+ - ▁mar
425
+ - ▁ga
426
+ - ▁take
427
+ - ▁dr
428
+ - ful
429
+ - ▁get
430
+ - ▁shows
431
+ - day
432
+ - ▁cha
433
+ - ▁than
434
+ - ▁know
435
+ - ian
436
+ - ▁see
437
+ - ▁just
438
+ - '2'
439
+ - ▁other
440
+ - old
441
+ - ▁design
442
+ - ▁chi
443
+ - ▁build
444
+ - ious
445
+ - ▁most
446
+ - ▁si
447
+ - ▁will
448
+ - ▁power
449
+ - ▁think
450
+ - port
451
+ - ▁over
452
+ - ▁ja
453
+ - ish
454
+ - ▁climate
455
+ - ▁sha
456
+ - ▁through
457
+ - less
458
+ - '3'
459
+ - ▁my
460
+ - ▁where
461
+ - ▁global
462
+ - ▁health
463
+ - ▁pri
464
+ - ▁20
465
+ - ▁story
466
+ - gu
467
+ - ugh
468
+ - ▁create
469
+ - ▁look
470
+ - ▁trans
471
+ - ▁har
472
+ - ▁even
473
+ - ▁part
474
+ - ▁years
475
+ - ▁lead
476
+ - side
477
+ - low
478
+ - long
479
+ - ▁technolog
480
+ - ness
481
+ - '5'
482
+ - ▁call
483
+ - ▁sc
484
+ - ▁system
485
+ - '9'
486
+ - line
487
+ - ▁brain
488
+ - ▁data
489
+ - ▁own
490
+ - ition
491
+ - ▁explains
492
+ - ▁tell
493
+ - ▁explore
494
+ - ▁start
495
+ - ▁ru
496
+ - ▁which
497
+ - ▁anderson
498
+ - ▁find
499
+ - ▁hu
500
+ - ▁women
501
+ - ▁better
502
+ - ▁idea
503
+ - ▁history
504
+ - ▁research
505
+ - ▁science
506
+ - ism
507
+ - ▁first
508
+ - ▁grow
509
+ - ▁right
510
+ - clu
511
+ - ▁space
512
+ - ▁develop
513
+ - ▁problem
514
+ - ▁two
515
+ - ▁earth
516
+ - ologist
517
+ - ▁many
518
+ - ▁should
519
+ - ▁three
520
+ - ▁fellow
521
+ - ▁social
522
+ - ▁africa
523
+ - ▁...
524
+ - '4'
525
+ - ▁addis
526
+ - ▁powerful
527
+ - ▁found
528
+ - ▁under
529
+ - ▁understand
530
+ - ▁after
531
+ - ▁stories
532
+ - ▁around
533
+ - ▁personal
534
+ - ▁project
535
+ - ▁between
536
+ - ▁question
537
+ - ▁play
538
+ - ▁scientist
539
+ - ▁happen
540
+ - ▁good
541
+ - ▁produc
542
+ - ▁experience
543
+ - ▁step
544
+ - ▁america
545
+ - '8'
546
+ - ▁great
547
+ - ▁down
548
+ - ▁high
549
+ - ▁would
550
+ - ▁turn
551
+ - ▁surprising
552
+ - ▁imagin
553
+ - ▁teach
554
+ - cross
555
+ - ▁place
556
+ - ▁medic
557
+ - ▁million
558
+ - ▁things
559
+ - '7'
560
+ - ▁reveal
561
+ - ▁without
562
+ - ▁challenge
563
+ - ▁next
564
+ - ▁each
565
+ - ▁studio
566
+ - organ
567
+ - '6'
568
+ - ▁business
569
+ - ▁much
570
+ - ▁show
571
+ - ▁conversation
572
+ - ▁energy
573
+ - ▁school
574
+ - ▁ocean
575
+ - ▁while
576
+ - source
577
+ - ization
578
+ - ▁break
579
+ - ▁robot
580
+ - ▁disease
581
+ - ▁behind
582
+ - ability
583
+ - ▁team
584
+ - ▁chris
585
+ - ▁become
586
+ - ▁solution
587
+ - ▁protect
588
+ - ▁collect
589
+ - ▁different
590
+ - ▁those
591
+ - ▁connect
592
+ - ▁architect
593
+ - ▁language
594
+ - ▁simple
595
+ - ▁solve
596
+ - ▁before
597
+ - ▁community
598
+ - ▁country
599
+ - ▁secret
600
+ - ▁keep
601
+ - ▁food
602
+ - ▁thought
603
+ - ▁discover
604
+ - ▁environment
605
+ - ▁government
606
+ - ▁public
607
+ - ;
608
+ - '!'
609
+ - /
610
+ - q
611
+ - '%'
612
+ - '@'
613
+ - ']'
614
+ - +
615
+ - '&'
616
+ - '|'
617
+ - _
618
+ - (
619
+ - '"'
620
+ - $
621
+ - '*'
622
+ - '='
623
+ - '['
624
+ - '`'
625
+ - <sos/eos>
626
+ transcript_token_list: null
627
+ two_pass: false
628
+ pre_postencoder_norm: false
629
+ init: null
630
+ input_size: null
631
+ ctc_conf:
632
+ dropout_rate: 0.0
633
+ ctc_type: builtin
634
+ reduce: true
635
+ ignore_nan_grad: null
636
+ zero_infinity: true
637
+ brctc_risk_strategy: exp
638
+ brctc_group_strategy: end
639
+ brctc_risk_factor: 0.0
640
+ joint_net_conf: null
641
+ use_preprocessor: true
642
+ token_type: bpe
643
+ bpemodel: data/en_token_list/bpe_unigram500/bpe.model
644
+ non_linguistic_symbols: null
645
+ cleaner: null
646
+ g2p: null
647
+ speech_volume_normalize: null
648
+ rir_scp: null
649
+ rir_apply_prob: 1.0
650
+ noise_scp: null
651
+ noise_apply_prob: 1.0
652
+ noise_db_range: '13_15'
653
+ short_noise_thres: 0.5
654
+ frontend: default
655
+ frontend_conf:
656
+ n_fft: 512
657
+ win_length: 400
658
+ hop_length: 160
659
+ fs: 16k
660
+ specaug: specaug
661
+ specaug_conf:
662
+ apply_time_warp: false
663
+ time_warp_window: 5
664
+ time_warp_mode: bicubic
665
+ apply_freq_mask: true
666
+ freq_mask_width_range:
667
+ - 0
668
+ - 27
669
+ num_freq_mask: 2
670
+ apply_time_mask: true
671
+ time_mask_width_ratio_range:
672
+ - 0.0
673
+ - 0.05
674
+ num_time_mask: 10
675
+ normalize: global_mvn
676
+ normalize_conf:
677
+ stats_file: /scratch/bbjs/arora1/new_download_espnet_egs2/harpervalley/slu1_superb_onlyda/owsm_v3.1_ebf/exp/s2t_stats_raw_bpe50000/train/feats_stats.npz
678
+ model: espnet
679
+ model_conf:
680
+ ctc_weight: 0.0
681
+ lsm_weight: 0.1
682
+ length_normalized_loss: false
683
+ weighted_sum: true
684
+ extract_feats_in_collect_stats: false
685
+ preencoder: null
686
+ preencoder_conf: {}
687
+ encoder: e_branchformer
688
+ encoder_conf:
689
+ output_size: 1024
690
+ attention_heads: 16
691
+ attention_layer_type: selfattn
692
+ pos_enc_layer_type: abs_pos
693
+ rel_pos_type: latest
694
+ cgmlp_linear_units: 4096
695
+ cgmlp_conv_kernel: 31
696
+ use_linear_after_conv: false
697
+ gate_activation: identity
698
+ num_blocks: 18
699
+ dropout_rate: 0.2
700
+ positional_dropout_rate: 0.2
701
+ attention_dropout_rate: 0.2
702
+ input_layer: conv2d
703
+ layer_drop_rate: 0.0
704
+ linear_units: 4096
705
+ positionwise_layer_type: linear
706
+ use_ffn: true
707
+ macaron_ffn: true
708
+ merge_conv_kernel: 31
709
+ prepostencoder: linear
710
+ prepostencoder_conf:
711
+ input_size: 1024
712
+ output_size: 80
713
+ postencoder: conformer_full
714
+ postencoder_conf:
715
+ output_size: 256
716
+ attention_heads: 4
717
+ linear_units: 1024
718
+ num_blocks: 12
719
+ dropout_rate: 0.1
720
+ positional_dropout_rate: 0.2
721
+ attention_dropout_rate: 0.1
722
+ input_layer: conv2d1
723
+ normalize_before: true
724
+ macaron_style: true
725
+ rel_pos_type: latest
726
+ pos_enc_layer_type: rel_pos
727
+ selfattention_layer_type: rel_selfattn
728
+ activation_type: swish
729
+ use_cnn_module: true
730
+ cnn_module_kernel: 31
731
+ deliberationencoder: null
732
+ deliberationencoder_conf: {}
733
+ decoder: transformer
734
+ decoder_conf:
735
+ attention_heads: 4
736
+ linear_units: 2048
737
+ num_blocks: 6
738
+ dropout_rate: 0.1
739
+ positional_dropout_rate: 0.1
740
+ self_attention_dropout_rate: 0.1
741
+ src_attention_dropout_rate: 0.1
742
+ postdecoder: null
743
+ postdecoder_conf: {}
744
+ required:
745
+ - output_dir
746
+ - token_list
747
+ version: '202310'
748
+ distributed: true
exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/images/acc.png ADDED
exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/images/backward_time.png ADDED
exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/images/cer.png ADDED
exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/images/clip.png ADDED
exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/images/forward_time.png ADDED
exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/images/gpu_max_cached_mem_GB.png ADDED
exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/images/grad_norm.png ADDED
exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/images/iter_time.png ADDED
exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/images/loss.png ADDED
exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/images/loss_att.png ADDED
exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/images/loss_scale.png ADDED
exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/images/optim0_lr0.png ADDED
exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/images/optim_step_time.png ADDED
exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/images/train_time.png ADDED
exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/images/wer.png ADDED
exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/valid.acc.ave_10best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0aefdf647fd9bef1d3d957e27d8a6e561671e081106dd4a45fdd69e01342948b
3
+ size 2381810530
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: '202310'
2
+ files:
3
+ slu_model_file: exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/valid.acc.ave_10best.pth
4
+ python: "3.9.13 (main, Aug 25 2022, 23:26:10) \n[GCC 11.2.0]"
5
+ timestamp: 1715364701.23388
6
+ torch: 2.1.0+cu121
7
+ yaml_files:
8
+ slu_train_config: exp/slu_train_asr_owsm_weighted_dropout_raw_en_bpe500_sp/config.yaml