ayameRushia commited on
Commit
307620d
·
1 Parent(s): 366645e

Training in progress, step 1600

Browse files
.ipynb_checkpoints/run_speech_recognition_ctc-checkpoint.py CHANGED
@@ -439,17 +439,16 @@ def main():
439
 
440
  def remove_special_characters(batch):
441
  if chars_to_ignore_regex is not None:
442
- batch["target_text"] = re.sub(chars_to_ignore_regex, "", batch[text_column_name]).lower() + " "
 
 
 
 
 
 
 
443
  else:
444
- batch["target_text"] = batch[text_column_name].lower() + " "
445
- batch["target_text"] = re.sub('[â]', 'a', batch[text_column_name]).lower() + " "
446
- batch["target_text"] = re.sub('[î]', 'i', batch[text_column_name]).lower() + " "
447
- batch["target_text"] = re.sub('[ô]', 'o', batch[text_column_name]).lower() + " "
448
- batch["target_text"] = re.sub('[û]', 'u', batch[text_column_name]).lower() + " "
449
- batch["target_text"] = re.sub('!', '', batch[text_column_name]).lower() + " "
450
- batch["target_text"] = re.sub('[û]', 'u', batch[text_column_name]).lower() + " "
451
- batch["target_text"] = batch[text_column_name].replace('\"',"").replace("&","").replace("'","").replace("(","").lower() + " "
452
- batch["target_text"] = batch[text_column_name].replace('[',"").replace("]","").replace("\\","").replace(")","").lower() + " "
453
  return batch
454
 
455
  with training_args.main_process_first(desc="dataset map special characters removal"):
 
439
 
440
  def remove_special_characters(batch):
441
  if chars_to_ignore_regex is not None:
442
+ batch["sentence"] = re.sub(chars_to_ignore_regex, "", batch["sentence"]).lower() + " "
443
+ batch["sentence"] = re.sub('!', '', batch["sentence"]).lower() + " "
444
+ batch["sentence"] = batch["sentence"].replace('â', 'a').replace('á', 'a').replace('ō', 'o').lower() + " "
445
+ batch["sentence"] = batch["sentence"].replace('û', 'u').replace('ł', 'l').replace('é', 'e').lower() + " "
446
+ batch["sentence"] = batch["sentence"].replace('ń',"n").replace('î',"").replace('ô',"").replace('û',"").lower() + " "
447
+ batch["sentence"] = batch["sentence"].replace('\"',"").replace("&","").replace("'","").replace("(","").lower() + " "
448
+ batch["sentence"] = batch["sentence"].replace('[',"").replace("]","").replace("\\","").replace(")","").lower() + " "
449
+ batch["sentence"] = batch["sentence"].replace(" "," ").replace(" "," ").replace(" "," ").lower() + " "
450
  else:
451
+ batch["sentence"] = batch["sentence"].lower() + " "
 
 
 
 
 
 
 
 
452
  return batch
453
 
454
  with training_args.main_process_first(desc="dataset map special characters removal"):
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b491618ba7f98aa5aaa423d998ee9b9ba88ee3b6e91a5f322e641d6e22f6f33d
3
  size 1262091761
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2da86a14f166346a4c719264af09e51aa06466d45a74e9e6ecc06aea1d14528
3
  size 1262091761
run_speech_recognition_ctc.py CHANGED
@@ -439,17 +439,16 @@ def main():
439
 
440
  def remove_special_characters(batch):
441
  if chars_to_ignore_regex is not None:
442
- batch["target_text"] = re.sub(chars_to_ignore_regex, "", batch[text_column_name]).lower() + " "
 
 
 
 
 
 
 
443
  else:
444
- batch["target_text"] = batch[text_column_name].lower() + " "
445
- batch["target_text"] = re.sub('[â]', 'a', batch[text_column_name]).lower() + " "
446
- batch["target_text"] = re.sub('[î]', 'i', batch[text_column_name]).lower() + " "
447
- batch["target_text"] = re.sub('[ô]', 'o', batch[text_column_name]).lower() + " "
448
- batch["target_text"] = re.sub('[û]', 'u', batch[text_column_name]).lower() + " "
449
- batch["target_text"] = re.sub('!', '', batch[text_column_name]).lower() + " "
450
- batch["target_text"] = re.sub('[û]', 'u', batch[text_column_name]).lower() + " "
451
- batch["target_text"] = batch[text_column_name].replace('\"',"").replace("&","").replace("'","").replace("(","").lower() + " "
452
- batch["target_text"] = batch[text_column_name].replace('[',"").replace("]","").replace("\\","").replace(")","").lower() + " "
453
  return batch
454
 
455
  with training_args.main_process_first(desc="dataset map special characters removal"):
 
439
 
440
  def remove_special_characters(batch):
441
  if chars_to_ignore_regex is not None:
442
+ batch["sentence"] = re.sub(chars_to_ignore_regex, "", batch["sentence"]).lower() + " "
443
+ batch["sentence"] = re.sub('!', '', batch["sentence"]).lower() + " "
444
+ batch["sentence"] = batch["sentence"].replace('â', 'a').replace('á', 'a').replace('ō', 'o').lower() + " "
445
+ batch["sentence"] = batch["sentence"].replace('û', 'u').replace('ł', 'l').replace('é', 'e').lower() + " "
446
+ batch["sentence"] = batch["sentence"].replace('ń',"n").replace('î',"").replace('ô',"").replace('û',"").lower() + " "
447
+ batch["sentence"] = batch["sentence"].replace('\"',"").replace("&","").replace("'","").replace("(","").lower() + " "
448
+ batch["sentence"] = batch["sentence"].replace('[',"").replace("]","").replace("\\","").replace(")","").lower() + " "
449
+ batch["sentence"] = batch["sentence"].replace(" "," ").replace(" "," ").replace(" "," ").lower() + " "
450
  else:
451
+ batch["sentence"] = batch["sentence"].lower() + " "
 
 
 
 
 
 
 
 
452
  return batch
453
 
454
  with training_args.main_process_first(desc="dataset map special characters removal"):
wandb/run-20220130_033240-1ciqu2rb/files/output.log CHANGED
@@ -2288,3 +2288,765 @@ Configuration saved in ./checkpoint-1200/config.json
2288
  {'eval_loss': 0.3779752850532532, 'eval_wer': 0.3348087406233984, 'eval_runtime': 176.2153, 'eval_samples_per_second': 20.475, 'eval_steps_per_second': 2.559, 'epoch': 9.3}
2289
  Model weights saved in ./checkpoint-1200/pytorch_model.bin
2290
  Configuration saved in ./checkpoint-1200/preprocessor_config.json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2288
  {'eval_loss': 0.3779752850532532, 'eval_wer': 0.3348087406233984, 'eval_runtime': 176.2153, 'eval_samples_per_second': 20.475, 'eval_steps_per_second': 2.559, 'epoch': 9.3}
2289
  Model weights saved in ./checkpoint-1200/pytorch_model.bin
2290
  Configuration saved in ./checkpoint-1200/preprocessor_config.json
2291
+ Configuration saved in ./preprocessor_config.json
2292
+ Deleting older checkpoint [checkpoint-400] due to args.save_total_limit
2293
+
2294
+
2295
+
2296
+
2297
+
2298
+
2299
+
2300
+
2301
+
2302
+
2303
+
2304
+
2305
+
2306
+
2307
+
2308
+
2309
+
2310
+
2311
+
2312
+
2313
+
2314
+
2315
+
2316
+
2317
+
2318
+
2319
+
2320
+
2321
+
2322
+
2323
+
2324
+
2325
+
2326
+
2327
+
2328
+
2329
+
2330
+
2331
+
2332
+
2333
+
2334
+
2335
+
2336
+
2337
+
2338
+
2339
+
2340
+
2341
+
2342
+
2343
+
2344
+
2345
+
2346
+
2347
+
2348
+
2349
+
2350
+
2351
+
2352
+
2353
+
2354
+
2355
+
2356
+
2357
+
2358
+
2359
+
2360
+
2361
+
2362
+
2363
+
2364
+
2365
+
2366
+
2367
+
2368
+
2369
+
2370
+
2371
+
2372
+
2373
+
2374
+
2375
+
2376
+
2377
+
2378
+
2379
+
2380
+
2381
+
2382
+
2383
+
2384
+
2385
+
2386
+
2387
+
2388
+
2389
+
2390
+
2391
+ 34%|█████████████████████████████████████▉ | 1300/3870 [2:02:31<3:12:24, 4.49s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.
2392
+ ***** Running Evaluation *****
2393
+ Num examples = 3608
2394
+ Batch size = 8
2395
+
2396
+
2397
+
2398
+
2399
+
2400
+
2401
+
2402
+
2403
+
2404
+
2405
+
2406
+
2407
+
2408
+
2409
+
2410
+
2411
+
2412
+
2413
+
2414
+
2415
+
2416
+
2417
+
2418
+
2419
+
2420
+
2421
+
2422
+
2423
+
2424
+
2425
+
2426
+
2427
+
2428
+
2429
+
2430
+
2431
+
2432
+
2433
+
2434
+
2435
+
2436
+
2437
+
2438
+
2439
+
2440
+
2441
+
2442
+
2443
+
2444
+
2445
+
2446
+
2447
+
2448
+
2449
+
2450
+
2451
+
2452
+
2453
+
2454
+
2455
+
2456
+
2457
+
2458
+
2459
+
2460
+
2461
+
2462
+
2463
+
2464
+
2465
+
2466
+
2467
+
2468
+
2469
+
2470
+
2471
+
2472
+
2473
+
2474
+
2475
+
2476
+
2477
+
2478
+ 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 451/451 [02:50<00:00, 2.53it/s]
2479
+
2480
+
2481
+
2482
+
2483
+
2484
+
2485
+
2486
+
2487
+
2488
+
2489
+
2490
+
2491
+
2492
+
2493
+
2494
+
2495
+
2496
+
2497
+
2498
+
2499
+
2500
+
2501
+
2502
+
2503
+
2504
+
2505
+
2506
+
2507
+
2508
+
2509
+
2510
+
2511
+
2512
+
2513
+
2514
+
2515
+
2516
+
2517
+
2518
+
2519
+
2520
+
2521
+
2522
+
2523
+
2524
+
2525
+
2526
+
2527
+
2528
+
2529
+
2530
+
2531
+
2532
+
2533
+
2534
+
2535
+
2536
+
2537
+
2538
+
2539
+
2540
+
2541
+
2542
+
2543
+
2544
+
2545
+
2546
+
2547
+
2548
+
2549
+
2550
+
2551
+
2552
+
2553
+
2554
+
2555
+
2556
+
2557
+
2558
+
2559
+
2560
+
2561
+
2562
+
2563
+
2564
+
2565
+
2566
+
2567
+
2568
+
2569
+
2570
+
2571
+
2572
+
2573
+
2574
+
2575
+
2576
+
2577
+
2578
+
2579
+ 36%|████████████████████████████████████████▉ | 1400/3870 [2:11:49<2:48:02, 4.08s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.
2580
+ ***** Running Evaluation *****
2581
+ Num examples = 3608
2582
+ Batch size = 8
2583
+
2584
+
2585
+
2586
+
2587
+
2588
+
2589
+
2590
+
2591
+
2592
+
2593
+
2594
+
2595
+
2596
+
2597
+
2598
+
2599
+
2600
+
2601
+
2602
+
2603
+
2604
+
2605
+
2606
+
2607
+
2608
+
2609
+
2610
+
2611
+
2612
+
2613
+
2614
+
2615
+
2616
+
2617
+
2618
+
2619
+
2620
+
2621
+
2622
+
2623
+
2624
+
2625
+
2626
+
2627
+
2628
+
2629
+
2630
+
2631
+
2632
+
2633
+
2634
+
2635
+
2636
+
2637
+
2638
+
2639
+
2640
+
2641
+
2642
+
2643
+
2644
+
2645
+
2646
+
2647
+
2648
+
2649
+
2650
+
2651
+
2652
+
2653
+
2654
+
2655
+
2656
+
2657
+
2658
+
2659
+
2660
+
2661
+
2662
+
2663
+
2664
+
2665
+
2666
+
2667
+ 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 451/451 [02:51<00:00, 2.53it/s]
2668
+
2669
+
2670
+
2671
+
2672
+
2673
+
2674
+
2675
+
2676
+
2677
+
2678
+
2679
+
2680
+
2681
+
2682
+
2683
+
2684
+
2685
+
2686
+
2687
+
2688
+
2689
+
2690
+
2691
+
2692
+
2693
+
2694
+
2695
+
2696
+
2697
+
2698
+
2699
+
2700
+
2701
+
2702
+
2703
+
2704
+
2705
+
2706
+
2707
+
2708
+
2709
+
2710
+
2711
+
2712
+
2713
+
2714
+
2715
+
2716
+
2717
+
2718
+
2719
+
2720
+
2721
+
2722
+
2723
+
2724
+
2725
+
2726
+
2727
+
2728
+
2729
+
2730
+
2731
+
2732
+
2733
+
2734
+
2735
+
2736
+
2737
+
2738
+
2739
+
2740
+
2741
+
2742
+
2743
+
2744
+
2745
+
2746
+
2747
+
2748
+
2749
+
2750
+
2751
+
2752
+
2753
+
2754
+
2755
+
2756
+
2757
+
2758
+
2759
+
2760
+
2761
+
2762
+
2763
+
2764
+
2765
+
2766
+
2767
+
2768
+ 39%|███████████████████████████████████████████▊ | 1499/3870 [2:21:03<2:35:27, 3.93s/it]
2769
+ 39%|███████████████████████████████████████████▊ | 1500/3870 [2:21:07<2:34:01, 3.90s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.
2770
+ ***** Running Evaluation *****
2771
+ Num examples = 3608
2772
+ Batch size = 8
2773
+
2774
+
2775
+
2776
+
2777
+
2778
+
2779
+
2780
+
2781
+
2782
+
2783
+
2784
+
2785
+
2786
+
2787
+
2788
+
2789
+
2790
+
2791
+
2792
+
2793
+
2794
+
2795
+
2796
+
2797
+
2798
+
2799
+
2800
+
2801
+
2802
+
2803
+
2804
+
2805
+
2806
+
2807
+
2808
+
2809
+
2810
+
2811
+
2812
+
2813
+
2814
+
2815
+
2816
+
2817
+
2818
+
2819
+
2820
+
2821
+
2822
+
2823
+
2824
+
2825
+
2826
+
2827
+
2828
+
2829
+
2830
+
2831
+
2832
+
2833
+
2834
+
2835
+
2836
+
2837
+
2838
+
2839
+
2840
+
2841
+
2842
+
2843
+
2844
+
2845
+
2846
+
2847
+
2848
+
2849
+
2850
+
2851
+
2852
+
2853
+
2854
+
2855
+
2856
+
2857
+
2858
+ 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████| 451/451 [02:51<00:00, 2.41it/s]
2859
+
2860
+
2861
+
2862
+
2863
+
2864
+
2865
+
2866
+
2867
+
2868
+
2869
+
2870
+
2871
+
2872
+
2873
+
2874
+
2875
+
2876
+
2877
+
2878
+
2879
+
2880
+
2881
+
2882
+
2883
+
2884
+
2885
+
2886
+
2887
+
2888
+
2889
+
2890
+
2891
+
2892
+
2893
+
2894
+
2895
+
2896
+
2897
+
2898
+
2899
+
2900
+
2901
+
2902
+
2903
+
2904
+
2905
+
2906
+
2907
+
2908
+
2909
+
2910
+
2911
+
2912
+
2913
+
2914
+
2915
+
2916
+
2917
+
2918
+
2919
+
2920
+
2921
+
2922
+
2923
+
2924
+
2925
+
2926
+
2927
+
2928
+
2929
+
2930
+
2931
+
2932
+
2933
+
2934
+
2935
+
2936
+
2937
+
2938
+
2939
+
2940
+
2941
+
2942
+
2943
+
2944
+
2945
+
2946
+
2947
+
2948
+
2949
+
2950
+
2951
+
2952
+
2953
+
2954
+
2955
+
2956
+
2957
+
2958
+
2959
+ 41%|██████████████████████████████████████████████▋ | 1600/3870 [2:30:27<2:17:07, 3.62s/it]The following columns in the evaluation set don't have a corresponding argument in `Wav2Vec2ForCTC.forward` and have been ignored: input_length.
2960
+ ***** Running Evaluation *****
2961
+ Num examples = 3608
2962
+ Batch size = 8
2963
+
2964
+
2965
+
2966
+
2967
+
2968
+
2969
+
2970
+
2971
+
2972
+
2973
+
2974
+
2975
+
2976
+
2977
+
2978
+
2979
+
2980
+
2981
+
2982
+
2983
+
2984
+
2985
+
2986
+
2987
+
2988
+
2989
+
2990
+
2991
+
2992
+
2993
+
2994
+
2995
+
2996
+
2997
+
2998
+
2999
+
3000
+
3001
+
3002
+
3003
+
3004
+
3005
+
3006
+
3007
+
3008
+
3009
+
3010
+
3011
+
3012
+
3013
+
3014
+
3015
+
3016
+
3017
+
3018
+
3019
+
3020
+
3021
+
3022
+
3023
+
3024
+
3025
+
3026
+
3027
+
3028
+
3029
+
3030
+
3031
+
3032
+
3033
+
3034
+
3035
+
3036
+
3037
+
3038
+
3039
+
3040
+
3041
+
3042
+
3043
+
3044
+
3045
+
3046
+
3047
+
3048
+
3049
+ Configuration saved in ./checkpoint-1600/config.json
3050
+ {'eval_loss': 0.3882218301296234, 'eval_wer': 0.3162186087685785, 'eval_runtime': 175.3702, 'eval_samples_per_second': 20.574, 'eval_steps_per_second': 2.572, 'epoch': 12.4}
3051
+ Model weights saved in ./checkpoint-1600/pytorch_model.bin
3052
+ Configuration saved in ./checkpoint-1600/preprocessor_config.json
wandb/run-20220130_033240-1ciqu2rb/files/wandb-summary.json CHANGED
The diff for this file is too large to render. See raw diff
 
wandb/run-20220130_033240-1ciqu2rb/logs/debug-internal.log CHANGED
The diff for this file is too large to render. See raw diff
 
wandb/run-20220130_033240-1ciqu2rb/run-1ciqu2rb.wandb CHANGED
Binary files a/wandb/run-20220130_033240-1ciqu2rb/run-1ciqu2rb.wandb and b/wandb/run-20220130_033240-1ciqu2rb/run-1ciqu2rb.wandb differ