{ "best_metric": null, "best_model_checkpoint": null, "epoch": 25.0, "eval_steps": 500, "global_step": 10100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12, "learning_rate": 1.0000000000000002e-06, "loss": 10.8557, "step": 50 }, { "epoch": 0.25, "learning_rate": 2.0000000000000003e-06, "loss": 10.5052, "step": 100 }, { "epoch": 0.37, "learning_rate": 3e-06, "loss": 9.5, "step": 150 }, { "epoch": 0.5, "learning_rate": 4.000000000000001e-06, "loss": 7.2099, "step": 200 }, { "epoch": 0.62, "learning_rate": 5e-06, "loss": 5.6515, "step": 250 }, { "epoch": 0.74, "learning_rate": 6e-06, "loss": 5.1371, "step": 300 }, { "epoch": 0.87, "learning_rate": 7e-06, "loss": 4.6222, "step": 350 }, { "epoch": 0.99, "learning_rate": 8.000000000000001e-06, "loss": 4.4197, "step": 400 }, { "epoch": 1.0, "eval_cer": 0.9960395255351591, "eval_loss": 4.07672643661499, "eval_runtime": 48.0448, "eval_samples_per_second": 134.541, "eval_steps_per_second": 8.409, "eval_wer": 0.9928168659986352, "step": 404 }, { "epoch": 1.11, "learning_rate": 9e-06, "loss": 4.178, "step": 450 }, { "epoch": 1.24, "learning_rate": 1e-05, "loss": 3.9224, "step": 500 }, { "epoch": 1.36, "learning_rate": 1.1000000000000001e-05, "loss": 3.7071, "step": 550 }, { "epoch": 1.49, "learning_rate": 1.2e-05, "loss": 3.5329, "step": 600 }, { "epoch": 1.61, "learning_rate": 1.3000000000000001e-05, "loss": 3.3935, "step": 650 }, { "epoch": 1.73, "learning_rate": 1.4e-05, "loss": 3.18, "step": 700 }, { "epoch": 1.86, "learning_rate": 1.5000000000000002e-05, "loss": 3.0269, "step": 750 }, { "epoch": 1.98, "learning_rate": 1.6000000000000003e-05, "loss": 2.8984, "step": 800 }, { "epoch": 2.0, "eval_cer": 0.9960395255351591, "eval_loss": 2.7950191497802734, "eval_runtime": 47.7431, "eval_samples_per_second": 135.391, "eval_steps_per_second": 8.462, "eval_wer": 0.9928168659986352, "step": 808 }, { "epoch": 2.1, "learning_rate": 1.7e-05, "loss": 2.7876, "step": 850 }, { "epoch": 2.23, "learning_rate": 1.8e-05, "loss": 2.7058, "step": 900 }, { "epoch": 2.35, "learning_rate": 1.9e-05, "loss": 2.6375, "step": 950 }, { "epoch": 2.48, "learning_rate": 2e-05, "loss": 2.5938, "step": 1000 }, { "epoch": 2.6, "learning_rate": 1.989010989010989e-05, "loss": 2.5391, "step": 1050 }, { "epoch": 2.72, "learning_rate": 1.9780219780219784e-05, "loss": 2.4169, "step": 1100 }, { "epoch": 2.85, "learning_rate": 1.967032967032967e-05, "loss": 2.264, "step": 1150 }, { "epoch": 2.97, "learning_rate": 1.9560439560439563e-05, "loss": 2.1179, "step": 1200 }, { "epoch": 3.0, "eval_cer": 0.9960395255351591, "eval_loss": 1.9177764654159546, "eval_runtime": 47.4792, "eval_samples_per_second": 136.144, "eval_steps_per_second": 8.509, "eval_wer": 0.9928168659986352, "step": 1212 }, { "epoch": 3.09, "learning_rate": 1.9450549450549452e-05, "loss": 2.0151, "step": 1250 }, { "epoch": 3.22, "learning_rate": 1.9340659340659342e-05, "loss": 1.9512, "step": 1300 }, { "epoch": 3.34, "learning_rate": 1.923076923076923e-05, "loss": 1.8304, "step": 1350 }, { "epoch": 3.47, "learning_rate": 1.9120879120879124e-05, "loss": 1.7452, "step": 1400 }, { "epoch": 3.59, "learning_rate": 1.901098901098901e-05, "loss": 1.6516, "step": 1450 }, { "epoch": 3.71, "learning_rate": 1.8901098901098903e-05, "loss": 1.5666, "step": 1500 }, { "epoch": 3.84, "learning_rate": 1.8791208791208793e-05, "loss": 1.5196, "step": 1550 }, { "epoch": 3.96, "learning_rate": 1.8681318681318682e-05, "loss": 1.4282, "step": 1600 }, { "epoch": 4.0, "eval_cer": 0.4791183983841264, "eval_loss": 1.0854228734970093, "eval_runtime": 47.4019, "eval_samples_per_second": 136.366, "eval_steps_per_second": 8.523, "eval_wer": 0.6262256222389828, "step": 1616 }, { "epoch": 4.08, "learning_rate": 1.8571428571428575e-05, "loss": 1.372, "step": 1650 }, { "epoch": 4.21, "learning_rate": 1.8461538461538465e-05, "loss": 1.3009, "step": 1700 }, { "epoch": 4.33, "learning_rate": 1.835164835164835e-05, "loss": 1.2673, "step": 1750 }, { "epoch": 4.46, "learning_rate": 1.8241758241758244e-05, "loss": 1.1968, "step": 1800 }, { "epoch": 4.58, "learning_rate": 1.8131868131868133e-05, "loss": 1.1639, "step": 1850 }, { "epoch": 4.7, "learning_rate": 1.8021978021978023e-05, "loss": 1.1464, "step": 1900 }, { "epoch": 4.83, "learning_rate": 1.7912087912087915e-05, "loss": 1.0984, "step": 1950 }, { "epoch": 4.95, "learning_rate": 1.78021978021978e-05, "loss": 1.0793, "step": 2000 }, { "epoch": 5.0, "eval_cer": 0.294362264599299, "eval_loss": 0.7671939134597778, "eval_runtime": 47.1803, "eval_samples_per_second": 137.006, "eval_steps_per_second": 8.563, "eval_wer": 0.49962288546492833, "step": 2020 }, { "epoch": 5.07, "learning_rate": 1.7692307692307694e-05, "loss": 1.0631, "step": 2050 }, { "epoch": 5.2, "learning_rate": 1.7582417582417584e-05, "loss": 1.0267, "step": 2100 }, { "epoch": 5.32, "learning_rate": 1.7472527472527473e-05, "loss": 1.0496, "step": 2150 }, { "epoch": 5.45, "learning_rate": 1.7362637362637363e-05, "loss": 0.9783, "step": 2200 }, { "epoch": 5.57, "learning_rate": 1.7252747252747256e-05, "loss": 0.9673, "step": 2250 }, { "epoch": 5.69, "learning_rate": 1.7142857142857142e-05, "loss": 0.9404, "step": 2300 }, { "epoch": 5.82, "learning_rate": 1.7032967032967035e-05, "loss": 0.9489, "step": 2350 }, { "epoch": 5.94, "learning_rate": 1.6923076923076924e-05, "loss": 0.9064, "step": 2400 }, { "epoch": 6.0, "eval_cer": 0.27366878552050533, "eval_loss": 0.6211672425270081, "eval_runtime": 47.5588, "eval_samples_per_second": 135.916, "eval_steps_per_second": 8.495, "eval_wer": 0.4573142261968897, "step": 2424 }, { "epoch": 6.06, "learning_rate": 1.6813186813186814e-05, "loss": 0.9205, "step": 2450 }, { "epoch": 6.19, "learning_rate": 1.6703296703296707e-05, "loss": 0.8951, "step": 2500 }, { "epoch": 6.31, "learning_rate": 1.6593406593406596e-05, "loss": 0.8795, "step": 2550 }, { "epoch": 6.44, "learning_rate": 1.6483516483516486e-05, "loss": 0.8895, "step": 2600 }, { "epoch": 6.56, "learning_rate": 1.6373626373626375e-05, "loss": 0.8843, "step": 2650 }, { "epoch": 6.68, "learning_rate": 1.6263736263736265e-05, "loss": 0.8283, "step": 2700 }, { "epoch": 6.81, "learning_rate": 1.6153846153846154e-05, "loss": 0.8294, "step": 2750 }, { "epoch": 6.93, "learning_rate": 1.6043956043956047e-05, "loss": 0.8366, "step": 2800 }, { "epoch": 7.0, "eval_cer": 0.24499495039505734, "eval_loss": 0.5246793627738953, "eval_runtime": 47.4984, "eval_samples_per_second": 136.089, "eval_steps_per_second": 8.506, "eval_wer": 0.413173867758503, "step": 2828 }, { "epoch": 7.05, "learning_rate": 1.5934065934065933e-05, "loss": 0.8077, "step": 2850 }, { "epoch": 7.18, "learning_rate": 1.5824175824175826e-05, "loss": 0.8373, "step": 2900 }, { "epoch": 7.3, "learning_rate": 1.5714285714285715e-05, "loss": 0.7896, "step": 2950 }, { "epoch": 7.43, "learning_rate": 1.5604395604395605e-05, "loss": 0.7666, "step": 3000 }, { "epoch": 7.55, "learning_rate": 1.5494505494505498e-05, "loss": 0.7846, "step": 3050 }, { "epoch": 7.67, "learning_rate": 1.5384615384615387e-05, "loss": 0.7748, "step": 3100 }, { "epoch": 7.8, "learning_rate": 1.5274725274725277e-05, "loss": 0.7596, "step": 3150 }, { "epoch": 7.92, "learning_rate": 1.5164835164835166e-05, "loss": 0.7425, "step": 3200 }, { "epoch": 8.0, "eval_cer": 0.2257272421236064, "eval_loss": 0.45023345947265625, "eval_runtime": 47.1685, "eval_samples_per_second": 137.041, "eval_steps_per_second": 8.565, "eval_wer": 0.3785511618719247, "step": 3232 }, { "epoch": 8.04, "learning_rate": 1.5054945054945056e-05, "loss": 0.7275, "step": 3250 }, { "epoch": 8.17, "learning_rate": 1.4945054945054947e-05, "loss": 0.7249, "step": 3300 }, { "epoch": 8.29, "learning_rate": 1.4835164835164836e-05, "loss": 0.7246, "step": 3350 }, { "epoch": 8.42, "learning_rate": 1.4725274725274727e-05, "loss": 0.7058, "step": 3400 }, { "epoch": 8.54, "learning_rate": 1.4615384615384615e-05, "loss": 0.7114, "step": 3450 }, { "epoch": 8.66, "learning_rate": 1.4505494505494506e-05, "loss": 0.7049, "step": 3500 }, { "epoch": 8.79, "learning_rate": 1.4395604395604396e-05, "loss": 0.674, "step": 3550 }, { "epoch": 8.91, "learning_rate": 1.4285714285714287e-05, "loss": 0.7017, "step": 3600 }, { "epoch": 9.0, "eval_cer": 0.2082021426166855, "eval_loss": 0.39116355776786804, "eval_runtime": 47.8558, "eval_samples_per_second": 135.072, "eval_steps_per_second": 8.442, "eval_wer": 0.3509320116366771, "step": 3636 }, { "epoch": 9.03, "learning_rate": 1.4175824175824178e-05, "loss": 0.6818, "step": 3650 }, { "epoch": 9.16, "learning_rate": 1.4065934065934068e-05, "loss": 0.6718, "step": 3700 }, { "epoch": 9.28, "learning_rate": 1.3956043956043957e-05, "loss": 0.6451, "step": 3750 }, { "epoch": 9.41, "learning_rate": 1.3846153846153847e-05, "loss": 0.6496, "step": 3800 }, { "epoch": 9.53, "learning_rate": 1.3736263736263738e-05, "loss": 0.6437, "step": 3850 }, { "epoch": 9.65, "learning_rate": 1.3626373626373627e-05, "loss": 0.6289, "step": 3900 }, { "epoch": 9.78, "learning_rate": 1.3516483516483519e-05, "loss": 0.6422, "step": 3950 }, { "epoch": 9.9, "learning_rate": 1.3406593406593406e-05, "loss": 0.6275, "step": 4000 }, { "epoch": 10.0, "eval_cer": 0.19792471138042336, "eval_loss": 0.3406831622123718, "eval_runtime": 47.4155, "eval_samples_per_second": 136.327, "eval_steps_per_second": 8.52, "eval_wer": 0.332794598283231, "step": 4040 }, { "epoch": 10.02, "learning_rate": 1.3296703296703298e-05, "loss": 0.6054, "step": 4050 }, { "epoch": 10.15, "learning_rate": 1.3186813186813187e-05, "loss": 0.6368, "step": 4100 }, { "epoch": 10.27, "learning_rate": 1.3076923076923078e-05, "loss": 0.6517, "step": 4150 }, { "epoch": 10.4, "learning_rate": 1.296703296703297e-05, "loss": 0.5973, "step": 4200 }, { "epoch": 10.52, "learning_rate": 1.2857142857142859e-05, "loss": 0.6143, "step": 4250 }, { "epoch": 10.64, "learning_rate": 1.2747252747252747e-05, "loss": 0.6007, "step": 4300 }, { "epoch": 10.77, "learning_rate": 1.2637362637362638e-05, "loss": 0.6099, "step": 4350 }, { "epoch": 10.89, "learning_rate": 1.2527472527472529e-05, "loss": 0.5853, "step": 4400 }, { "epoch": 11.0, "eval_cer": 0.19198399968316204, "eval_loss": 0.3044804036617279, "eval_runtime": 47.1639, "eval_samples_per_second": 137.054, "eval_steps_per_second": 8.566, "eval_wer": 0.32263046367129977, "step": 4444 }, { "epoch": 11.01, "learning_rate": 1.2417582417582419e-05, "loss": 0.5824, "step": 4450 }, { "epoch": 11.14, "learning_rate": 1.230769230769231e-05, "loss": 0.5675, "step": 4500 }, { "epoch": 11.26, "learning_rate": 1.21978021978022e-05, "loss": 0.5906, "step": 4550 }, { "epoch": 11.39, "learning_rate": 1.2087912087912089e-05, "loss": 0.6308, "step": 4600 }, { "epoch": 11.51, "learning_rate": 1.1978021978021978e-05, "loss": 0.5545, "step": 4650 }, { "epoch": 11.63, "learning_rate": 1.186813186813187e-05, "loss": 0.5603, "step": 4700 }, { "epoch": 11.76, "learning_rate": 1.1758241758241759e-05, "loss": 0.54, "step": 4750 }, { "epoch": 11.88, "learning_rate": 1.164835164835165e-05, "loss": 0.5551, "step": 4800 }, { "epoch": 12.0, "eval_cer": 0.1864789401770332, "eval_loss": 0.2657150328159332, "eval_runtime": 47.4966, "eval_samples_per_second": 136.094, "eval_steps_per_second": 8.506, "eval_wer": 0.31386704018963474, "step": 4848 }, { "epoch": 12.0, "learning_rate": 1.1538461538461538e-05, "loss": 0.5421, "step": 4850 }, { "epoch": 12.13, "learning_rate": 1.1428571428571429e-05, "loss": 0.5352, "step": 4900 }, { "epoch": 12.25, "learning_rate": 1.131868131868132e-05, "loss": 0.5306, "step": 4950 }, { "epoch": 12.38, "learning_rate": 1.120879120879121e-05, "loss": 0.5574, "step": 5000 }, { "epoch": 12.5, "learning_rate": 1.1098901098901101e-05, "loss": 0.555, "step": 5050 }, { "epoch": 12.62, "learning_rate": 1.098901098901099e-05, "loss": 0.5331, "step": 5100 }, { "epoch": 12.75, "learning_rate": 1.087912087912088e-05, "loss": 0.5215, "step": 5150 }, { "epoch": 12.87, "learning_rate": 1.076923076923077e-05, "loss": 0.5225, "step": 5200 }, { "epoch": 13.0, "learning_rate": 1.065934065934066e-05, "loss": 0.5105, "step": 5250 }, { "epoch": 13.0, "eval_cer": 0.18267688469078597, "eval_loss": 0.24552097916603088, "eval_runtime": 47.4722, "eval_samples_per_second": 136.164, "eval_steps_per_second": 8.51, "eval_wer": 0.30855152102862476, "step": 5252 }, { "epoch": 13.12, "learning_rate": 1.054945054945055e-05, "loss": 0.5578, "step": 5300 }, { "epoch": 13.24, "learning_rate": 1.0439560439560441e-05, "loss": 0.5168, "step": 5350 }, { "epoch": 13.37, "learning_rate": 1.0329670329670332e-05, "loss": 0.5363, "step": 5400 }, { "epoch": 13.49, "learning_rate": 1.021978021978022e-05, "loss": 0.512, "step": 5450 }, { "epoch": 13.61, "learning_rate": 1.010989010989011e-05, "loss": 0.5085, "step": 5500 }, { "epoch": 13.74, "learning_rate": 1e-05, "loss": 0.4896, "step": 5550 }, { "epoch": 13.86, "learning_rate": 9.890109890109892e-06, "loss": 0.5197, "step": 5600 }, { "epoch": 13.99, "learning_rate": 9.780219780219781e-06, "loss": 0.5073, "step": 5650 }, { "epoch": 14.0, "eval_cer": 0.18317194399889106, "eval_loss": 0.23889391124248505, "eval_runtime": 47.6163, "eval_samples_per_second": 135.752, "eval_steps_per_second": 8.484, "eval_wer": 0.3091620874187408, "step": 5656 }, { "epoch": 14.11, "learning_rate": 9.670329670329671e-06, "loss": 0.4856, "step": 5700 }, { "epoch": 14.23, "learning_rate": 9.560439560439562e-06, "loss": 0.5116, "step": 5750 }, { "epoch": 14.36, "learning_rate": 9.450549450549452e-06, "loss": 0.4966, "step": 5800 }, { "epoch": 14.48, "learning_rate": 9.340659340659341e-06, "loss": 0.4882, "step": 5850 }, { "epoch": 14.6, "learning_rate": 9.230769230769232e-06, "loss": 0.4852, "step": 5900 }, { "epoch": 14.73, "learning_rate": 9.120879120879122e-06, "loss": 0.4962, "step": 5950 }, { "epoch": 14.85, "learning_rate": 9.010989010989011e-06, "loss": 0.4839, "step": 6000 }, { "epoch": 14.98, "learning_rate": 8.9010989010989e-06, "loss": 0.4722, "step": 6050 }, { "epoch": 15.0, "eval_cer": 0.17808273431157054, "eval_loss": 0.2169932723045349, "eval_runtime": 46.7989, "eval_samples_per_second": 138.123, "eval_steps_per_second": 8.633, "eval_wer": 0.30298459217756707, "step": 6060 }, { "epoch": 15.1, "learning_rate": 8.791208791208792e-06, "loss": 0.4853, "step": 6100 }, { "epoch": 15.22, "learning_rate": 8.681318681318681e-06, "loss": 0.507, "step": 6150 }, { "epoch": 15.35, "learning_rate": 8.571428571428571e-06, "loss": 0.4966, "step": 6200 }, { "epoch": 15.47, "learning_rate": 8.461538461538462e-06, "loss": 0.5092, "step": 6250 }, { "epoch": 15.59, "learning_rate": 8.351648351648353e-06, "loss": 0.4625, "step": 6300 }, { "epoch": 15.72, "learning_rate": 8.241758241758243e-06, "loss": 0.4779, "step": 6350 }, { "epoch": 15.84, "learning_rate": 8.131868131868132e-06, "loss": 0.4631, "step": 6400 }, { "epoch": 15.97, "learning_rate": 8.021978021978023e-06, "loss": 0.481, "step": 6450 }, { "epoch": 16.0, "eval_cer": 0.17830056040713677, "eval_loss": 0.20887333154678345, "eval_runtime": 47.3142, "eval_samples_per_second": 136.619, "eval_steps_per_second": 8.539, "eval_wer": 0.3022662787774306, "step": 6464 }, { "epoch": 16.09, "learning_rate": 7.912087912087913e-06, "loss": 0.4574, "step": 6500 }, { "epoch": 16.21, "learning_rate": 7.802197802197802e-06, "loss": 0.4749, "step": 6550 }, { "epoch": 16.34, "learning_rate": 7.692307692307694e-06, "loss": 0.466, "step": 6600 }, { "epoch": 16.46, "learning_rate": 7.582417582417583e-06, "loss": 0.4489, "step": 6650 }, { "epoch": 16.58, "learning_rate": 7.472527472527473e-06, "loss": 0.4841, "step": 6700 }, { "epoch": 16.71, "learning_rate": 7.362637362637364e-06, "loss": 0.4736, "step": 6750 }, { "epoch": 16.83, "learning_rate": 7.252747252747253e-06, "loss": 0.4453, "step": 6800 }, { "epoch": 16.96, "learning_rate": 7.1428571428571436e-06, "loss": 0.4738, "step": 6850 }, { "epoch": 17.0, "eval_cer": 0.17634012554704054, "eval_loss": 0.20021960139274597, "eval_runtime": 46.7477, "eval_samples_per_second": 138.274, "eval_steps_per_second": 8.642, "eval_wer": 0.3004345796070826, "step": 6868 }, { "epoch": 17.08, "learning_rate": 7.032967032967034e-06, "loss": 0.4469, "step": 6900 }, { "epoch": 17.2, "learning_rate": 6.923076923076923e-06, "loss": 0.4408, "step": 6950 }, { "epoch": 17.33, "learning_rate": 6.813186813186814e-06, "loss": 0.4471, "step": 7000 }, { "epoch": 17.45, "learning_rate": 6.703296703296703e-06, "loss": 0.4468, "step": 7050 }, { "epoch": 17.57, "learning_rate": 6.5934065934065935e-06, "loss": 0.4348, "step": 7100 }, { "epoch": 17.7, "learning_rate": 6.483516483516485e-06, "loss": 0.433, "step": 7150 }, { "epoch": 17.82, "learning_rate": 6.373626373626373e-06, "loss": 0.4479, "step": 7200 }, { "epoch": 17.95, "learning_rate": 6.2637362637362645e-06, "loss": 0.4518, "step": 7250 }, { "epoch": 18.0, "eval_cer": 0.17651834689795837, "eval_loss": 0.19896449148654938, "eval_runtime": 46.712, "eval_samples_per_second": 138.38, "eval_steps_per_second": 8.649, "eval_wer": 0.3006141579571167, "step": 7272 }, { "epoch": 18.07, "learning_rate": 6.153846153846155e-06, "loss": 0.4314, "step": 7300 }, { "epoch": 18.19, "learning_rate": 6.043956043956044e-06, "loss": 0.4413, "step": 7350 }, { "epoch": 18.32, "learning_rate": 5.934065934065935e-06, "loss": 0.4276, "step": 7400 }, { "epoch": 18.44, "learning_rate": 5.824175824175825e-06, "loss": 0.4326, "step": 7450 }, { "epoch": 18.56, "learning_rate": 5.7142857142857145e-06, "loss": 0.4662, "step": 7500 }, { "epoch": 18.69, "learning_rate": 5.604395604395605e-06, "loss": 0.4266, "step": 7550 }, { "epoch": 18.81, "learning_rate": 5.494505494505495e-06, "loss": 0.4281, "step": 7600 }, { "epoch": 18.94, "learning_rate": 5.384615384615385e-06, "loss": 0.4402, "step": 7650 }, { "epoch": 19.0, "eval_cer": 0.17635992791936475, "eval_loss": 0.19003288447856903, "eval_runtime": 47.6013, "eval_samples_per_second": 135.795, "eval_steps_per_second": 8.487, "eval_wer": 0.2998599288869734, "step": 7676 }, { "epoch": 19.06, "learning_rate": 5.274725274725275e-06, "loss": 0.427, "step": 7700 }, { "epoch": 19.18, "learning_rate": 5.164835164835166e-06, "loss": 0.4166, "step": 7750 }, { "epoch": 19.31, "learning_rate": 5.054945054945055e-06, "loss": 0.4612, "step": 7800 }, { "epoch": 19.43, "learning_rate": 4.945054945054946e-06, "loss": 0.4286, "step": 7850 }, { "epoch": 19.55, "learning_rate": 4.8351648351648355e-06, "loss": 0.4271, "step": 7900 }, { "epoch": 19.68, "learning_rate": 4.725274725274726e-06, "loss": 0.4215, "step": 7950 }, { "epoch": 19.8, "learning_rate": 4.615384615384616e-06, "loss": 0.4486, "step": 8000 }, { "epoch": 19.93, "learning_rate": 4.505494505494506e-06, "loss": 0.4387, "step": 8050 }, { "epoch": 20.0, "eval_cer": 0.17396384086813602, "eval_loss": 0.18259279429912567, "eval_runtime": 47.0879, "eval_samples_per_second": 137.275, "eval_steps_per_second": 8.58, "eval_wer": 0.29695075961642065, "step": 8080 }, { "epoch": 20.05, "learning_rate": 4.395604395604396e-06, "loss": 0.4323, "step": 8100 }, { "epoch": 20.17, "learning_rate": 4.2857142857142855e-06, "loss": 0.4124, "step": 8150 }, { "epoch": 20.3, "learning_rate": 4.175824175824177e-06, "loss": 0.4112, "step": 8200 }, { "epoch": 20.42, "learning_rate": 4.065934065934066e-06, "loss": 0.4347, "step": 8250 }, { "epoch": 20.54, "learning_rate": 3.9560439560439565e-06, "loss": 0.4031, "step": 8300 }, { "epoch": 20.67, "learning_rate": 3.846153846153847e-06, "loss": 0.4122, "step": 8350 }, { "epoch": 20.79, "learning_rate": 3.7362637362637367e-06, "loss": 0.4125, "step": 8400 }, { "epoch": 20.92, "learning_rate": 3.6263736263736266e-06, "loss": 0.4212, "step": 8450 }, { "epoch": 21.0, "eval_cer": 0.17331036258143726, "eval_loss": 0.17667880654335022, "eval_runtime": 47.2671, "eval_samples_per_second": 136.755, "eval_steps_per_second": 8.547, "eval_wer": 0.29547821714614086, "step": 8484 }, { "epoch": 21.04, "learning_rate": 3.516483516483517e-06, "loss": 0.4213, "step": 8500 }, { "epoch": 21.16, "learning_rate": 3.406593406593407e-06, "loss": 0.4322, "step": 8550 }, { "epoch": 21.29, "learning_rate": 3.2967032967032968e-06, "loss": 0.4116, "step": 8600 }, { "epoch": 21.41, "learning_rate": 3.1868131868131867e-06, "loss": 0.404, "step": 8650 }, { "epoch": 21.53, "learning_rate": 3.0769230769230774e-06, "loss": 0.4016, "step": 8700 }, { "epoch": 21.66, "learning_rate": 2.9670329670329673e-06, "loss": 0.4031, "step": 8750 }, { "epoch": 21.78, "learning_rate": 2.8571428571428573e-06, "loss": 0.4072, "step": 8800 }, { "epoch": 21.91, "learning_rate": 2.7472527472527476e-06, "loss": 0.3893, "step": 8850 }, { "epoch": 22.0, "eval_cer": 0.17188459177409454, "eval_loss": 0.17065875232219696, "eval_runtime": 47.0103, "eval_samples_per_second": 137.502, "eval_steps_per_second": 8.594, "eval_wer": 0.2937183493158065, "step": 8888 }, { "epoch": 22.03, "learning_rate": 2.6373626373626375e-06, "loss": 0.4191, "step": 8900 }, { "epoch": 22.15, "learning_rate": 2.5274725274725274e-06, "loss": 0.4094, "step": 8950 }, { "epoch": 22.28, "learning_rate": 2.4175824175824177e-06, "loss": 0.4275, "step": 9000 }, { "epoch": 22.4, "learning_rate": 2.307692307692308e-06, "loss": 0.4044, "step": 9050 }, { "epoch": 22.52, "learning_rate": 2.197802197802198e-06, "loss": 0.4023, "step": 9100 }, { "epoch": 22.65, "learning_rate": 2.0879120879120883e-06, "loss": 0.3937, "step": 9150 }, { "epoch": 22.77, "learning_rate": 1.9780219780219782e-06, "loss": 0.4118, "step": 9200 }, { "epoch": 22.9, "learning_rate": 1.8681318681318684e-06, "loss": 0.4055, "step": 9250 }, { "epoch": 23.0, "eval_cer": 0.17228063922057862, "eval_loss": 0.17037759721279144, "eval_runtime": 47.8098, "eval_samples_per_second": 135.202, "eval_steps_per_second": 8.45, "eval_wer": 0.2942930000359157, "step": 9292 }, { "epoch": 23.02, "learning_rate": 1.7582417582417585e-06, "loss": 0.3926, "step": 9300 }, { "epoch": 23.14, "learning_rate": 1.6483516483516484e-06, "loss": 0.4367, "step": 9350 }, { "epoch": 23.27, "learning_rate": 1.5384615384615387e-06, "loss": 0.4156, "step": 9400 }, { "epoch": 23.39, "learning_rate": 1.4285714285714286e-06, "loss": 0.3998, "step": 9450 }, { "epoch": 23.51, "learning_rate": 1.3186813186813187e-06, "loss": 0.4496, "step": 9500 }, { "epoch": 23.64, "learning_rate": 1.2087912087912089e-06, "loss": 0.4061, "step": 9550 }, { "epoch": 23.76, "learning_rate": 1.098901098901099e-06, "loss": 0.3822, "step": 9600 }, { "epoch": 23.89, "learning_rate": 9.890109890109891e-07, "loss": 0.394, "step": 9650 }, { "epoch": 24.0, "eval_cer": 0.17158755618923147, "eval_loss": 0.16843372583389282, "eval_runtime": 47.3805, "eval_samples_per_second": 136.427, "eval_steps_per_second": 8.527, "eval_wer": 0.29339510828574505, "step": 9696 }, { "epoch": 24.01, "learning_rate": 8.791208791208792e-07, "loss": 0.4072, "step": 9700 }, { "epoch": 24.13, "learning_rate": 7.692307692307694e-07, "loss": 0.3867, "step": 9750 }, { "epoch": 24.26, "learning_rate": 6.593406593406594e-07, "loss": 0.4118, "step": 9800 }, { "epoch": 24.38, "learning_rate": 5.494505494505495e-07, "loss": 0.3978, "step": 9850 }, { "epoch": 24.5, "learning_rate": 4.395604395604396e-07, "loss": 0.4148, "step": 9900 }, { "epoch": 24.63, "learning_rate": 3.296703296703297e-07, "loss": 0.4074, "step": 9950 }, { "epoch": 24.75, "learning_rate": 2.197802197802198e-07, "loss": 0.4055, "step": 10000 }, { "epoch": 24.88, "learning_rate": 1.098901098901099e-07, "loss": 0.4309, "step": 10050 }, { "epoch": 25.0, "learning_rate": 0.0, "loss": 0.3997, "step": 10100 }, { "epoch": 25.0, "eval_cer": 0.17101328739182955, "eval_loss": 0.16850610077381134, "eval_runtime": 47.0414, "eval_samples_per_second": 137.411, "eval_steps_per_second": 8.588, "eval_wer": 0.29274862622562225, "step": 10100 }, { "epoch": 25.0, "step": 10100, "total_flos": 5.390210696735239e+18, "train_loss": 1.0812275544723662, "train_runtime": 21945.8534, "train_samples_per_second": 29.454, "train_steps_per_second": 0.46 } ], "logging_steps": 50, "max_steps": 10100, "num_train_epochs": 25, "save_steps": 500, "total_flos": 5.390210696735239e+18, "trial_name": null, "trial_params": null }