{ "best_metric": 0.44463480098223973, "best_model_checkpoint": "d:\\DataTicon\\Whisper-Khmer-Small\\whisper-khmer\\outputs\\whisper-base-khmer\\checkpoint-5000", "epoch": 2.995805871779509, "eval_steps": 1000, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.005991611743559017, "grad_norm": 2.331707239151001, "learning_rate": 1e-05, "loss": 0.0943, "step": 10 }, { "epoch": 0.011983223487118035, "grad_norm": 1.523736596107483, "learning_rate": 2e-05, "loss": 0.0746, "step": 20 }, { "epoch": 0.017974835230677052, "grad_norm": 3.2414867877960205, "learning_rate": 3e-05, "loss": 0.0876, "step": 30 }, { "epoch": 0.02396644697423607, "grad_norm": 3.187774419784546, "learning_rate": 4e-05, "loss": 0.094, "step": 40 }, { "epoch": 0.029958058717795086, "grad_norm": 2.1969003677368164, "learning_rate": 5e-05, "loss": 0.0975, "step": 50 }, { "epoch": 0.035949670461354104, "grad_norm": 3.237760543823242, "learning_rate": 4.9899132539842645e-05, "loss": 0.1178, "step": 60 }, { "epoch": 0.041941282204913125, "grad_norm": 4.322758197784424, "learning_rate": 4.9798265079685293e-05, "loss": 0.1447, "step": 70 }, { "epoch": 0.04793289394847214, "grad_norm": 3.5539541244506836, "learning_rate": 4.9707484365543676e-05, "loss": 0.1321, "step": 80 }, { "epoch": 0.05392450569203116, "grad_norm": 2.4829070568084717, "learning_rate": 4.9606616905386325e-05, "loss": 0.1137, "step": 90 }, { "epoch": 0.05991611743559017, "grad_norm": 3.877744197845459, "learning_rate": 4.9505749445228974e-05, "loss": 0.1218, "step": 100 }, { "epoch": 0.0659077291791492, "grad_norm": 3.387650728225708, "learning_rate": 4.9404881985071616e-05, "loss": 0.1253, "step": 110 }, { "epoch": 0.07189934092270821, "grad_norm": 2.919637441635132, "learning_rate": 4.9304014524914265e-05, "loss": 0.1181, "step": 120 }, { "epoch": 0.07789095266626722, "grad_norm": 3.4895029067993164, "learning_rate": 4.920314706475691e-05, "loss": 0.1111, "step": 130 }, { "epoch": 0.08388256440982625, "grad_norm": 4.521254062652588, "learning_rate": 4.910227960459956e-05, "loss": 0.1314, "step": 140 }, { "epoch": 0.08987417615338526, "grad_norm": 3.0622236728668213, "learning_rate": 4.9001412144442205e-05, "loss": 0.1555, "step": 150 }, { "epoch": 0.09586578789694428, "grad_norm": 4.06223726272583, "learning_rate": 4.8900544684284854e-05, "loss": 0.1388, "step": 160 }, { "epoch": 0.10185739964050329, "grad_norm": 3.4601869583129883, "learning_rate": 4.8799677224127496e-05, "loss": 0.1491, "step": 170 }, { "epoch": 0.10784901138406232, "grad_norm": 5.160483360290527, "learning_rate": 4.8698809763970145e-05, "loss": 0.1523, "step": 180 }, { "epoch": 0.11384062312762133, "grad_norm": 4.109564781188965, "learning_rate": 4.8597942303812794e-05, "loss": 0.1391, "step": 190 }, { "epoch": 0.11983223487118035, "grad_norm": 2.8160319328308105, "learning_rate": 4.8497074843655436e-05, "loss": 0.1132, "step": 200 }, { "epoch": 0.12582384661473936, "grad_norm": 3.797750473022461, "learning_rate": 4.8396207383498085e-05, "loss": 0.1243, "step": 210 }, { "epoch": 0.1318154583582984, "grad_norm": 3.033773422241211, "learning_rate": 4.8295339923340734e-05, "loss": 0.1565, "step": 220 }, { "epoch": 0.1378070701018574, "grad_norm": 4.846673011779785, "learning_rate": 4.8194472463183376e-05, "loss": 0.1666, "step": 230 }, { "epoch": 0.14379868184541642, "grad_norm": 2.80188250541687, "learning_rate": 4.8093605003026025e-05, "loss": 0.1502, "step": 240 }, { "epoch": 0.14979029358897544, "grad_norm": 3.6848764419555664, "learning_rate": 4.7992737542868674e-05, "loss": 0.1459, "step": 250 }, { "epoch": 0.15578190533253444, "grad_norm": 2.692138195037842, "learning_rate": 4.789187008271132e-05, "loss": 0.1204, "step": 260 }, { "epoch": 0.16177351707609347, "grad_norm": 3.547896385192871, "learning_rate": 4.7791002622553965e-05, "loss": 0.1556, "step": 270 }, { "epoch": 0.1677651288196525, "grad_norm": 3.1380774974823, "learning_rate": 4.7690135162396614e-05, "loss": 0.1273, "step": 280 }, { "epoch": 0.1737567405632115, "grad_norm": 3.2093448638916016, "learning_rate": 4.7589267702239256e-05, "loss": 0.1354, "step": 290 }, { "epoch": 0.17974835230677053, "grad_norm": 3.5889291763305664, "learning_rate": 4.7488400242081905e-05, "loss": 0.1215, "step": 300 }, { "epoch": 0.18573996405032953, "grad_norm": 3.47147536277771, "learning_rate": 4.7387532781924554e-05, "loss": 0.1406, "step": 310 }, { "epoch": 0.19173157579388855, "grad_norm": 2.5742290019989014, "learning_rate": 4.7286665321767197e-05, "loss": 0.1316, "step": 320 }, { "epoch": 0.19772318753744758, "grad_norm": 2.653899908065796, "learning_rate": 4.7185797861609845e-05, "loss": 0.1333, "step": 330 }, { "epoch": 0.20371479928100658, "grad_norm": 3.630166530609131, "learning_rate": 4.7084930401452494e-05, "loss": 0.1312, "step": 340 }, { "epoch": 0.2097064110245656, "grad_norm": 2.3841392993927, "learning_rate": 4.698406294129514e-05, "loss": 0.1453, "step": 350 }, { "epoch": 0.21569802276812464, "grad_norm": 3.2746708393096924, "learning_rate": 4.6883195481137785e-05, "loss": 0.139, "step": 360 }, { "epoch": 0.22168963451168364, "grad_norm": 4.752730369567871, "learning_rate": 4.6782328020980434e-05, "loss": 0.1154, "step": 370 }, { "epoch": 0.22768124625524266, "grad_norm": 2.3728339672088623, "learning_rate": 4.668146056082308e-05, "loss": 0.1293, "step": 380 }, { "epoch": 0.23367285799880166, "grad_norm": 3.4194209575653076, "learning_rate": 4.6580593100665726e-05, "loss": 0.1378, "step": 390 }, { "epoch": 0.2396644697423607, "grad_norm": 5.167896747589111, "learning_rate": 4.6479725640508374e-05, "loss": 0.1268, "step": 400 }, { "epoch": 0.24565608148591972, "grad_norm": 4.707974910736084, "learning_rate": 4.6378858180351023e-05, "loss": 0.1439, "step": 410 }, { "epoch": 0.2516476932294787, "grad_norm": 3.6553096771240234, "learning_rate": 4.627799072019367e-05, "loss": 0.1482, "step": 420 }, { "epoch": 0.2576393049730378, "grad_norm": 2.85052227973938, "learning_rate": 4.6177123260036315e-05, "loss": 0.1349, "step": 430 }, { "epoch": 0.2636309167165968, "grad_norm": 3.3663010597229004, "learning_rate": 4.607625579987896e-05, "loss": 0.1311, "step": 440 }, { "epoch": 0.2696225284601558, "grad_norm": 4.309895992279053, "learning_rate": 4.5975388339721606e-05, "loss": 0.1204, "step": 450 }, { "epoch": 0.2756141402037148, "grad_norm": 4.174650192260742, "learning_rate": 4.5874520879564255e-05, "loss": 0.135, "step": 460 }, { "epoch": 0.28160575194727383, "grad_norm": 3.756552219390869, "learning_rate": 4.5773653419406903e-05, "loss": 0.164, "step": 470 }, { "epoch": 0.28759736369083283, "grad_norm": 3.187208890914917, "learning_rate": 4.5672785959249546e-05, "loss": 0.1262, "step": 480 }, { "epoch": 0.29358897543439183, "grad_norm": 4.308648586273193, "learning_rate": 4.5571918499092195e-05, "loss": 0.1408, "step": 490 }, { "epoch": 0.2995805871779509, "grad_norm": 4.406998634338379, "learning_rate": 4.5471051038934844e-05, "loss": 0.1486, "step": 500 }, { "epoch": 0.3055721989215099, "grad_norm": 2.7028679847717285, "learning_rate": 4.5370183578777486e-05, "loss": 0.1305, "step": 510 }, { "epoch": 0.3115638106650689, "grad_norm": 2.9602255821228027, "learning_rate": 4.5269316118620135e-05, "loss": 0.1294, "step": 520 }, { "epoch": 0.31755542240862794, "grad_norm": 4.902966499328613, "learning_rate": 4.5168448658462784e-05, "loss": 0.167, "step": 530 }, { "epoch": 0.32354703415218694, "grad_norm": 3.3013415336608887, "learning_rate": 4.506758119830543e-05, "loss": 0.1293, "step": 540 }, { "epoch": 0.32953864589574594, "grad_norm": 3.711364269256592, "learning_rate": 4.4966713738148075e-05, "loss": 0.1425, "step": 550 }, { "epoch": 0.335530257639305, "grad_norm": 3.011685609817505, "learning_rate": 4.486584627799072e-05, "loss": 0.1375, "step": 560 }, { "epoch": 0.341521869382864, "grad_norm": 3.6026058197021484, "learning_rate": 4.476497881783337e-05, "loss": 0.1368, "step": 570 }, { "epoch": 0.347513481126423, "grad_norm": 3.2295010089874268, "learning_rate": 4.4664111357676015e-05, "loss": 0.1294, "step": 580 }, { "epoch": 0.35350509286998205, "grad_norm": 4.814386367797852, "learning_rate": 4.4563243897518664e-05, "loss": 0.1609, "step": 590 }, { "epoch": 0.35949670461354105, "grad_norm": 2.2720932960510254, "learning_rate": 4.4462376437361306e-05, "loss": 0.1215, "step": 600 }, { "epoch": 0.36548831635710005, "grad_norm": 2.83774995803833, "learning_rate": 4.436150897720396e-05, "loss": 0.1352, "step": 610 }, { "epoch": 0.37147992810065905, "grad_norm": 2.27290678024292, "learning_rate": 4.4260641517046604e-05, "loss": 0.1314, "step": 620 }, { "epoch": 0.3774715398442181, "grad_norm": 4.785513401031494, "learning_rate": 4.4159774056889246e-05, "loss": 0.145, "step": 630 }, { "epoch": 0.3834631515877771, "grad_norm": 3.888967514038086, "learning_rate": 4.4058906596731895e-05, "loss": 0.1473, "step": 640 }, { "epoch": 0.3894547633313361, "grad_norm": 3.7604222297668457, "learning_rate": 4.3958039136574544e-05, "loss": 0.133, "step": 650 }, { "epoch": 0.39544637507489516, "grad_norm": 2.7649688720703125, "learning_rate": 4.385717167641719e-05, "loss": 0.1401, "step": 660 }, { "epoch": 0.40143798681845416, "grad_norm": 2.3196585178375244, "learning_rate": 4.3756304216259835e-05, "loss": 0.1399, "step": 670 }, { "epoch": 0.40742959856201316, "grad_norm": 2.82987904548645, "learning_rate": 4.3655436756102484e-05, "loss": 0.1439, "step": 680 }, { "epoch": 0.4134212103055722, "grad_norm": 2.5953242778778076, "learning_rate": 4.355456929594513e-05, "loss": 0.1324, "step": 690 }, { "epoch": 0.4194128220491312, "grad_norm": 2.913365125656128, "learning_rate": 4.3453701835787775e-05, "loss": 0.1546, "step": 700 }, { "epoch": 0.4254044337926902, "grad_norm": 2.669905424118042, "learning_rate": 4.3352834375630424e-05, "loss": 0.1304, "step": 710 }, { "epoch": 0.4313960455362493, "grad_norm": 3.4256887435913086, "learning_rate": 4.3251966915473066e-05, "loss": 0.1349, "step": 720 }, { "epoch": 0.4373876572798083, "grad_norm": 2.3781442642211914, "learning_rate": 4.315109945531572e-05, "loss": 0.1315, "step": 730 }, { "epoch": 0.4433792690233673, "grad_norm": 2.7064337730407715, "learning_rate": 4.3050231995158364e-05, "loss": 0.1586, "step": 740 }, { "epoch": 0.44937088076692633, "grad_norm": 2.1065878868103027, "learning_rate": 4.294936453500101e-05, "loss": 0.1331, "step": 750 }, { "epoch": 0.45536249251048533, "grad_norm": 2.7355570793151855, "learning_rate": 4.2848497074843655e-05, "loss": 0.1188, "step": 760 }, { "epoch": 0.46135410425404433, "grad_norm": 2.5642316341400146, "learning_rate": 4.2747629614686304e-05, "loss": 0.1161, "step": 770 }, { "epoch": 0.46734571599760333, "grad_norm": 4.905550003051758, "learning_rate": 4.264676215452895e-05, "loss": 0.1226, "step": 780 }, { "epoch": 0.4733373277411624, "grad_norm": 2.9115347862243652, "learning_rate": 4.2545894694371595e-05, "loss": 0.134, "step": 790 }, { "epoch": 0.4793289394847214, "grad_norm": 2.8118338584899902, "learning_rate": 4.2445027234214244e-05, "loss": 0.1397, "step": 800 }, { "epoch": 0.4853205512282804, "grad_norm": 3.0868048667907715, "learning_rate": 4.234415977405689e-05, "loss": 0.1349, "step": 810 }, { "epoch": 0.49131216297183944, "grad_norm": 2.3112714290618896, "learning_rate": 4.224329231389954e-05, "loss": 0.131, "step": 820 }, { "epoch": 0.49730377471539844, "grad_norm": 3.563694477081299, "learning_rate": 4.2142424853742184e-05, "loss": 0.1372, "step": 830 }, { "epoch": 0.5032953864589574, "grad_norm": 3.5067434310913086, "learning_rate": 4.2041557393584826e-05, "loss": 0.1379, "step": 840 }, { "epoch": 0.5092869982025164, "grad_norm": 3.358790397644043, "learning_rate": 4.194068993342748e-05, "loss": 0.1322, "step": 850 }, { "epoch": 0.5152786099460755, "grad_norm": 2.632565498352051, "learning_rate": 4.1839822473270124e-05, "loss": 0.1542, "step": 860 }, { "epoch": 0.5212702216896345, "grad_norm": 3.4644100666046143, "learning_rate": 4.173895501311277e-05, "loss": 0.1394, "step": 870 }, { "epoch": 0.5272618334331935, "grad_norm": 1.9788542985916138, "learning_rate": 4.1638087552955415e-05, "loss": 0.1466, "step": 880 }, { "epoch": 0.5332534451767525, "grad_norm": 3.182896852493286, "learning_rate": 4.1537220092798064e-05, "loss": 0.1325, "step": 890 }, { "epoch": 0.5392450569203115, "grad_norm": 2.9272377490997314, "learning_rate": 4.143635263264071e-05, "loss": 0.1547, "step": 900 }, { "epoch": 0.5452366686638705, "grad_norm": 3.800172805786133, "learning_rate": 4.1335485172483355e-05, "loss": 0.1459, "step": 910 }, { "epoch": 0.5512282804074295, "grad_norm": 3.8579702377319336, "learning_rate": 4.1234617712326004e-05, "loss": 0.1448, "step": 920 }, { "epoch": 0.5572198921509887, "grad_norm": 3.6494719982147217, "learning_rate": 4.113375025216865e-05, "loss": 0.13, "step": 930 }, { "epoch": 0.5632115038945477, "grad_norm": 3.322023391723633, "learning_rate": 4.10328827920113e-05, "loss": 0.1366, "step": 940 }, { "epoch": 0.5692031156381067, "grad_norm": 3.6455154418945312, "learning_rate": 4.0932015331853944e-05, "loss": 0.1409, "step": 950 }, { "epoch": 0.5751947273816657, "grad_norm": 2.7181620597839355, "learning_rate": 4.083114787169659e-05, "loss": 0.1229, "step": 960 }, { "epoch": 0.5811863391252247, "grad_norm": 2.1503639221191406, "learning_rate": 4.073028041153924e-05, "loss": 0.1163, "step": 970 }, { "epoch": 0.5871779508687837, "grad_norm": 4.750929832458496, "learning_rate": 4.0629412951381884e-05, "loss": 0.1422, "step": 980 }, { "epoch": 0.5931695626123428, "grad_norm": 2.9658565521240234, "learning_rate": 4.052854549122453e-05, "loss": 0.1121, "step": 990 }, { "epoch": 0.5991611743559018, "grad_norm": 3.2859907150268555, "learning_rate": 4.042767803106718e-05, "loss": 0.1594, "step": 1000 }, { "epoch": 0.5991611743559018, "eval_loss": 0.696466326713562, "eval_runtime": 745.6448, "eval_samples_per_second": 1.902, "eval_steps_per_second": 0.119, "eval_wer": 0.5007994974587402, "step": 1000 }, { "epoch": 0.6051527860994608, "grad_norm": 2.541327476501465, "learning_rate": 4.032681057090983e-05, "loss": 0.1066, "step": 1010 }, { "epoch": 0.6111443978430198, "grad_norm": 2.80812406539917, "learning_rate": 4.022594311075247e-05, "loss": 0.1276, "step": 1020 }, { "epoch": 0.6171360095865788, "grad_norm": 3.0700008869171143, "learning_rate": 4.0125075650595115e-05, "loss": 0.1332, "step": 1030 }, { "epoch": 0.6231276213301378, "grad_norm": 2.8067898750305176, "learning_rate": 4.0024208190437764e-05, "loss": 0.1253, "step": 1040 }, { "epoch": 0.6291192330736968, "grad_norm": 2.7025182247161865, "learning_rate": 3.992334073028041e-05, "loss": 0.1325, "step": 1050 }, { "epoch": 0.6351108448172559, "grad_norm": 3.257716178894043, "learning_rate": 3.982247327012306e-05, "loss": 0.1327, "step": 1060 }, { "epoch": 0.6411024565608149, "grad_norm": 3.26241135597229, "learning_rate": 3.9721605809965704e-05, "loss": 0.1629, "step": 1070 }, { "epoch": 0.6470940683043739, "grad_norm": 2.933842420578003, "learning_rate": 3.962073834980835e-05, "loss": 0.1399, "step": 1080 }, { "epoch": 0.6530856800479329, "grad_norm": 4.348404407501221, "learning_rate": 3.9519870889651e-05, "loss": 0.1429, "step": 1090 }, { "epoch": 0.6590772917914919, "grad_norm": 2.5708649158477783, "learning_rate": 3.9419003429493644e-05, "loss": 0.1335, "step": 1100 }, { "epoch": 0.6650689035350509, "grad_norm": 3.140418291091919, "learning_rate": 3.931813596933629e-05, "loss": 0.1475, "step": 1110 }, { "epoch": 0.67106051527861, "grad_norm": 3.433528423309326, "learning_rate": 3.921726850917894e-05, "loss": 0.1442, "step": 1120 }, { "epoch": 0.677052127022169, "grad_norm": 3.7102956771850586, "learning_rate": 3.911640104902159e-05, "loss": 0.1507, "step": 1130 }, { "epoch": 0.683043738765728, "grad_norm": 2.1036972999572754, "learning_rate": 3.901553358886423e-05, "loss": 0.1341, "step": 1140 }, { "epoch": 0.689035350509287, "grad_norm": 2.8285186290740967, "learning_rate": 3.891466612870688e-05, "loss": 0.1357, "step": 1150 }, { "epoch": 0.695026962252846, "grad_norm": 3.331232786178589, "learning_rate": 3.881379866854953e-05, "loss": 0.1437, "step": 1160 }, { "epoch": 0.701018573996405, "grad_norm": 2.9205992221832275, "learning_rate": 3.871293120839217e-05, "loss": 0.1365, "step": 1170 }, { "epoch": 0.7070101857399641, "grad_norm": 3.321892261505127, "learning_rate": 3.861206374823482e-05, "loss": 0.1468, "step": 1180 }, { "epoch": 0.7130017974835231, "grad_norm": 2.895930528640747, "learning_rate": 3.8511196288077464e-05, "loss": 0.1272, "step": 1190 }, { "epoch": 0.7189934092270821, "grad_norm": 5.071239471435547, "learning_rate": 3.8410328827920113e-05, "loss": 0.1467, "step": 1200 }, { "epoch": 0.7249850209706411, "grad_norm": 5.675464153289795, "learning_rate": 3.830946136776276e-05, "loss": 0.1455, "step": 1210 }, { "epoch": 0.7309766327142001, "grad_norm": 3.4301488399505615, "learning_rate": 3.8208593907605405e-05, "loss": 0.1495, "step": 1220 }, { "epoch": 0.7369682444577591, "grad_norm": 4.892243385314941, "learning_rate": 3.8107726447448053e-05, "loss": 0.1581, "step": 1230 }, { "epoch": 0.7429598562013181, "grad_norm": 2.309187173843384, "learning_rate": 3.80068589872907e-05, "loss": 0.1498, "step": 1240 }, { "epoch": 0.7489514679448772, "grad_norm": 4.048982620239258, "learning_rate": 3.790599152713335e-05, "loss": 0.1504, "step": 1250 }, { "epoch": 0.7549430796884362, "grad_norm": 3.6077523231506348, "learning_rate": 3.7805124066975993e-05, "loss": 0.1632, "step": 1260 }, { "epoch": 0.7609346914319952, "grad_norm": 1.95468270778656, "learning_rate": 3.770425660681864e-05, "loss": 0.1308, "step": 1270 }, { "epoch": 0.7669263031755542, "grad_norm": 3.681065082550049, "learning_rate": 3.760338914666129e-05, "loss": 0.1273, "step": 1280 }, { "epoch": 0.7729179149191132, "grad_norm": 2.304133653640747, "learning_rate": 3.7502521686503934e-05, "loss": 0.1383, "step": 1290 }, { "epoch": 0.7789095266626722, "grad_norm": 3.7350196838378906, "learning_rate": 3.740165422634658e-05, "loss": 0.122, "step": 1300 }, { "epoch": 0.7849011384062313, "grad_norm": 2.206613540649414, "learning_rate": 3.7300786766189225e-05, "loss": 0.1353, "step": 1310 }, { "epoch": 0.7908927501497903, "grad_norm": 3.012110710144043, "learning_rate": 3.719991930603188e-05, "loss": 0.139, "step": 1320 }, { "epoch": 0.7968843618933493, "grad_norm": 3.693063497543335, "learning_rate": 3.709905184587452e-05, "loss": 0.1544, "step": 1330 }, { "epoch": 0.8028759736369083, "grad_norm": 2.1389901638031006, "learning_rate": 3.699818438571717e-05, "loss": 0.1386, "step": 1340 }, { "epoch": 0.8088675853804673, "grad_norm": 2.6895837783813477, "learning_rate": 3.6897316925559814e-05, "loss": 0.1315, "step": 1350 }, { "epoch": 0.8148591971240263, "grad_norm": 2.8541972637176514, "learning_rate": 3.679644946540246e-05, "loss": 0.1326, "step": 1360 }, { "epoch": 0.8208508088675854, "grad_norm": 3.095381259918213, "learning_rate": 3.669558200524511e-05, "loss": 0.1374, "step": 1370 }, { "epoch": 0.8268424206111444, "grad_norm": 1.8898283243179321, "learning_rate": 3.6594714545087754e-05, "loss": 0.1336, "step": 1380 }, { "epoch": 0.8328340323547034, "grad_norm": 3.8323473930358887, "learning_rate": 3.64938470849304e-05, "loss": 0.1335, "step": 1390 }, { "epoch": 0.8388256440982624, "grad_norm": 2.4315149784088135, "learning_rate": 3.639297962477305e-05, "loss": 0.145, "step": 1400 }, { "epoch": 0.8448172558418214, "grad_norm": 1.6960047483444214, "learning_rate": 3.62921121646157e-05, "loss": 0.1326, "step": 1410 }, { "epoch": 0.8508088675853804, "grad_norm": 2.898869752883911, "learning_rate": 3.619124470445834e-05, "loss": 0.1263, "step": 1420 }, { "epoch": 0.8568004793289394, "grad_norm": 2.389853000640869, "learning_rate": 3.6090377244300985e-05, "loss": 0.1337, "step": 1430 }, { "epoch": 0.8627920910724985, "grad_norm": 2.521010398864746, "learning_rate": 3.598950978414364e-05, "loss": 0.1218, "step": 1440 }, { "epoch": 0.8687837028160575, "grad_norm": 3.900918960571289, "learning_rate": 3.588864232398628e-05, "loss": 0.1161, "step": 1450 }, { "epoch": 0.8747753145596165, "grad_norm": 3.4358227252960205, "learning_rate": 3.578777486382893e-05, "loss": 0.1311, "step": 1460 }, { "epoch": 0.8807669263031755, "grad_norm": 2.923558473587036, "learning_rate": 3.5686907403671574e-05, "loss": 0.1359, "step": 1470 }, { "epoch": 0.8867585380467345, "grad_norm": 2.5613369941711426, "learning_rate": 3.558603994351423e-05, "loss": 0.1126, "step": 1480 }, { "epoch": 0.8927501497902935, "grad_norm": 2.89349627494812, "learning_rate": 3.548517248335687e-05, "loss": 0.1473, "step": 1490 }, { "epoch": 0.8987417615338527, "grad_norm": 2.973292112350464, "learning_rate": 3.5384305023199514e-05, "loss": 0.1485, "step": 1500 }, { "epoch": 0.9047333732774117, "grad_norm": 2.166511058807373, "learning_rate": 3.528343756304216e-05, "loss": 0.1242, "step": 1510 }, { "epoch": 0.9107249850209707, "grad_norm": 2.6223630905151367, "learning_rate": 3.518257010288481e-05, "loss": 0.1255, "step": 1520 }, { "epoch": 0.9167165967645297, "grad_norm": 2.9496703147888184, "learning_rate": 3.508170264272746e-05, "loss": 0.1303, "step": 1530 }, { "epoch": 0.9227082085080887, "grad_norm": 3.1444201469421387, "learning_rate": 3.49808351825701e-05, "loss": 0.1549, "step": 1540 }, { "epoch": 0.9286998202516477, "grad_norm": 4.5898590087890625, "learning_rate": 3.487996772241275e-05, "loss": 0.1306, "step": 1550 }, { "epoch": 0.9346914319952067, "grad_norm": 2.1030378341674805, "learning_rate": 3.47791002622554e-05, "loss": 0.1185, "step": 1560 }, { "epoch": 0.9406830437387658, "grad_norm": 2.828355550765991, "learning_rate": 3.467823280209804e-05, "loss": 0.135, "step": 1570 }, { "epoch": 0.9466746554823248, "grad_norm": 1.8146847486495972, "learning_rate": 3.457736534194069e-05, "loss": 0.1157, "step": 1580 }, { "epoch": 0.9526662672258838, "grad_norm": 5.510678768157959, "learning_rate": 3.4476497881783334e-05, "loss": 0.1204, "step": 1590 }, { "epoch": 0.9586578789694428, "grad_norm": 3.572500467300415, "learning_rate": 3.437563042162599e-05, "loss": 0.1465, "step": 1600 }, { "epoch": 0.9646494907130018, "grad_norm": 2.8663079738616943, "learning_rate": 3.427476296146863e-05, "loss": 0.1101, "step": 1610 }, { "epoch": 0.9706411024565608, "grad_norm": 2.5090837478637695, "learning_rate": 3.4173895501311274e-05, "loss": 0.1127, "step": 1620 }, { "epoch": 0.9766327142001199, "grad_norm": 2.927410840988159, "learning_rate": 3.407302804115392e-05, "loss": 0.1496, "step": 1630 }, { "epoch": 0.9826243259436789, "grad_norm": 2.375161647796631, "learning_rate": 3.397216058099657e-05, "loss": 0.1243, "step": 1640 }, { "epoch": 0.9886159376872379, "grad_norm": 3.1424996852874756, "learning_rate": 3.387129312083922e-05, "loss": 0.1285, "step": 1650 }, { "epoch": 0.9946075494307969, "grad_norm": 2.785811185836792, "learning_rate": 3.377042566068186e-05, "loss": 0.1286, "step": 1660 }, { "epoch": 1.0005991611743559, "grad_norm": 2.26493763923645, "learning_rate": 3.366955820052451e-05, "loss": 0.1366, "step": 1670 }, { "epoch": 1.0065907729179149, "grad_norm": 1.9755520820617676, "learning_rate": 3.356869074036716e-05, "loss": 0.0766, "step": 1680 }, { "epoch": 1.0125823846614739, "grad_norm": 2.0320472717285156, "learning_rate": 3.34678232802098e-05, "loss": 0.0777, "step": 1690 }, { "epoch": 1.0185739964050329, "grad_norm": 2.279986619949341, "learning_rate": 3.336695582005245e-05, "loss": 0.0767, "step": 1700 }, { "epoch": 1.0245656081485919, "grad_norm": 1.7731980085372925, "learning_rate": 3.32660883598951e-05, "loss": 0.0922, "step": 1710 }, { "epoch": 1.030557219892151, "grad_norm": 2.6267566680908203, "learning_rate": 3.316522089973775e-05, "loss": 0.0822, "step": 1720 }, { "epoch": 1.03654883163571, "grad_norm": 2.804668426513672, "learning_rate": 3.306435343958039e-05, "loss": 0.0855, "step": 1730 }, { "epoch": 1.042540443379269, "grad_norm": 1.526654601097107, "learning_rate": 3.296348597942304e-05, "loss": 0.066, "step": 1740 }, { "epoch": 1.048532055122828, "grad_norm": 2.8683853149414062, "learning_rate": 3.286261851926569e-05, "loss": 0.0811, "step": 1750 }, { "epoch": 1.054523666866387, "grad_norm": 1.9529556035995483, "learning_rate": 3.276175105910833e-05, "loss": 0.0823, "step": 1760 }, { "epoch": 1.060515278609946, "grad_norm": 2.4281697273254395, "learning_rate": 3.266088359895098e-05, "loss": 0.0948, "step": 1770 }, { "epoch": 1.066506890353505, "grad_norm": 2.2370128631591797, "learning_rate": 3.256001613879362e-05, "loss": 0.0793, "step": 1780 }, { "epoch": 1.072498502097064, "grad_norm": 3.237933397293091, "learning_rate": 3.245914867863627e-05, "loss": 0.0889, "step": 1790 }, { "epoch": 1.078490113840623, "grad_norm": 2.237818479537964, "learning_rate": 3.235828121847892e-05, "loss": 0.0739, "step": 1800 }, { "epoch": 1.084481725584182, "grad_norm": 2.257967710494995, "learning_rate": 3.225741375832157e-05, "loss": 0.0847, "step": 1810 }, { "epoch": 1.090473337327741, "grad_norm": 2.68813419342041, "learning_rate": 3.215654629816421e-05, "loss": 0.0924, "step": 1820 }, { "epoch": 1.0964649490713, "grad_norm": 3.0873122215270996, "learning_rate": 3.205567883800686e-05, "loss": 0.0766, "step": 1830 }, { "epoch": 1.102456560814859, "grad_norm": 2.305025339126587, "learning_rate": 3.195481137784951e-05, "loss": 0.0799, "step": 1840 }, { "epoch": 1.1084481725584183, "grad_norm": 2.242445707321167, "learning_rate": 3.185394391769215e-05, "loss": 0.0928, "step": 1850 }, { "epoch": 1.1144397843019773, "grad_norm": 3.2711095809936523, "learning_rate": 3.17530764575348e-05, "loss": 0.0863, "step": 1860 }, { "epoch": 1.1204313960455363, "grad_norm": 2.1616060733795166, "learning_rate": 3.165220899737745e-05, "loss": 0.0939, "step": 1870 }, { "epoch": 1.1264230077890953, "grad_norm": 1.922755479812622, "learning_rate": 3.155134153722009e-05, "loss": 0.0864, "step": 1880 }, { "epoch": 1.1324146195326543, "grad_norm": 2.7162983417510986, "learning_rate": 3.145047407706274e-05, "loss": 0.0804, "step": 1890 }, { "epoch": 1.1384062312762133, "grad_norm": 1.7950539588928223, "learning_rate": 3.134960661690538e-05, "loss": 0.076, "step": 1900 }, { "epoch": 1.1443978430197723, "grad_norm": 1.889122724533081, "learning_rate": 3.124873915674804e-05, "loss": 0.0725, "step": 1910 }, { "epoch": 1.1503894547633313, "grad_norm": 1.9550236463546753, "learning_rate": 3.114787169659068e-05, "loss": 0.0781, "step": 1920 }, { "epoch": 1.1563810665068903, "grad_norm": 2.5608322620391846, "learning_rate": 3.104700423643333e-05, "loss": 0.0786, "step": 1930 }, { "epoch": 1.1623726782504493, "grad_norm": 2.287541389465332, "learning_rate": 3.094613677627597e-05, "loss": 0.0773, "step": 1940 }, { "epoch": 1.1683642899940083, "grad_norm": 2.689326286315918, "learning_rate": 3.084526931611862e-05, "loss": 0.0778, "step": 1950 }, { "epoch": 1.1743559017375673, "grad_norm": 2.6887409687042236, "learning_rate": 3.074440185596127e-05, "loss": 0.0734, "step": 1960 }, { "epoch": 1.1803475134811263, "grad_norm": 2.236410140991211, "learning_rate": 3.064353439580391e-05, "loss": 0.0638, "step": 1970 }, { "epoch": 1.1863391252246855, "grad_norm": 1.5748876333236694, "learning_rate": 3.054266693564656e-05, "loss": 0.0755, "step": 1980 }, { "epoch": 1.1923307369682445, "grad_norm": 2.5412073135375977, "learning_rate": 3.0441799475489207e-05, "loss": 0.0843, "step": 1990 }, { "epoch": 1.1983223487118035, "grad_norm": 2.379398822784424, "learning_rate": 3.034093201533186e-05, "loss": 0.0665, "step": 2000 }, { "epoch": 1.1983223487118035, "eval_loss": 0.7183927297592163, "eval_runtime": 684.2755, "eval_samples_per_second": 2.072, "eval_steps_per_second": 0.13, "eval_wer": 0.49623094055165323, "step": 2000 }, { "epoch": 1.2043139604553625, "grad_norm": 2.496299982070923, "learning_rate": 3.02400645551745e-05, "loss": 0.0914, "step": 2010 }, { "epoch": 1.2103055721989215, "grad_norm": 3.0780892372131348, "learning_rate": 3.0139197095017147e-05, "loss": 0.0851, "step": 2020 }, { "epoch": 1.2162971839424805, "grad_norm": 2.4244396686553955, "learning_rate": 3.0038329634859796e-05, "loss": 0.0668, "step": 2030 }, { "epoch": 1.2222887956860395, "grad_norm": 3.1019625663757324, "learning_rate": 2.993746217470244e-05, "loss": 0.0653, "step": 2040 }, { "epoch": 1.2282804074295985, "grad_norm": 2.498575448989868, "learning_rate": 2.983659471454509e-05, "loss": 0.0732, "step": 2050 }, { "epoch": 1.2342720191731575, "grad_norm": 2.5372304916381836, "learning_rate": 2.9735727254387736e-05, "loss": 0.0866, "step": 2060 }, { "epoch": 1.2402636309167165, "grad_norm": 1.5571708679199219, "learning_rate": 2.9634859794230385e-05, "loss": 0.0845, "step": 2070 }, { "epoch": 1.2462552426602755, "grad_norm": 2.367034912109375, "learning_rate": 2.953399233407303e-05, "loss": 0.0787, "step": 2080 }, { "epoch": 1.2522468544038348, "grad_norm": 3.7114803791046143, "learning_rate": 2.9433124873915672e-05, "loss": 0.0858, "step": 2090 }, { "epoch": 1.2582384661473935, "grad_norm": 2.6696841716766357, "learning_rate": 2.9332257413758325e-05, "loss": 0.0976, "step": 2100 }, { "epoch": 1.2642300778909528, "grad_norm": 3.041776418685913, "learning_rate": 2.9231389953600967e-05, "loss": 0.0658, "step": 2110 }, { "epoch": 1.2702216896345118, "grad_norm": 3.1914546489715576, "learning_rate": 2.913052249344362e-05, "loss": 0.0753, "step": 2120 }, { "epoch": 1.2762133013780708, "grad_norm": 1.524961233139038, "learning_rate": 2.902965503328626e-05, "loss": 0.0791, "step": 2130 }, { "epoch": 1.2822049131216298, "grad_norm": 2.4494245052337646, "learning_rate": 2.8928787573128914e-05, "loss": 0.0707, "step": 2140 }, { "epoch": 1.2881965248651888, "grad_norm": 3.0455808639526367, "learning_rate": 2.8827920112971556e-05, "loss": 0.0853, "step": 2150 }, { "epoch": 1.2941881366087478, "grad_norm": 2.0262930393218994, "learning_rate": 2.87270526528142e-05, "loss": 0.0704, "step": 2160 }, { "epoch": 1.3001797483523068, "grad_norm": 1.6224325895309448, "learning_rate": 2.862618519265685e-05, "loss": 0.077, "step": 2170 }, { "epoch": 1.3061713600958658, "grad_norm": 2.560068130493164, "learning_rate": 2.8525317732499496e-05, "loss": 0.0845, "step": 2180 }, { "epoch": 1.3121629718394248, "grad_norm": 2.7368597984313965, "learning_rate": 2.8424450272342145e-05, "loss": 0.0777, "step": 2190 }, { "epoch": 1.3181545835829838, "grad_norm": 2.2715647220611572, "learning_rate": 2.832358281218479e-05, "loss": 0.0748, "step": 2200 }, { "epoch": 1.3241461953265428, "grad_norm": 2.4632680416107178, "learning_rate": 2.8222715352027436e-05, "loss": 0.0874, "step": 2210 }, { "epoch": 1.330137807070102, "grad_norm": 2.501645088195801, "learning_rate": 2.8121847891870085e-05, "loss": 0.0774, "step": 2220 }, { "epoch": 1.3361294188136608, "grad_norm": 1.52916419506073, "learning_rate": 2.802098043171273e-05, "loss": 0.0685, "step": 2230 }, { "epoch": 1.34212103055722, "grad_norm": 1.6257095336914062, "learning_rate": 2.792011297155538e-05, "loss": 0.0667, "step": 2240 }, { "epoch": 1.348112642300779, "grad_norm": 2.855592966079712, "learning_rate": 2.781924551139802e-05, "loss": 0.0832, "step": 2250 }, { "epoch": 1.354104254044338, "grad_norm": 2.8703014850616455, "learning_rate": 2.7718378051240674e-05, "loss": 0.068, "step": 2260 }, { "epoch": 1.360095865787897, "grad_norm": 2.3611278533935547, "learning_rate": 2.7617510591083316e-05, "loss": 0.0751, "step": 2270 }, { "epoch": 1.366087477531456, "grad_norm": 1.7880631685256958, "learning_rate": 2.751664313092596e-05, "loss": 0.0724, "step": 2280 }, { "epoch": 1.372079089275015, "grad_norm": 1.9004284143447876, "learning_rate": 2.741577567076861e-05, "loss": 0.0909, "step": 2290 }, { "epoch": 1.378070701018574, "grad_norm": 2.208883047103882, "learning_rate": 2.7314908210611256e-05, "loss": 0.0671, "step": 2300 }, { "epoch": 1.384062312762133, "grad_norm": 1.8540887832641602, "learning_rate": 2.7214040750453905e-05, "loss": 0.0861, "step": 2310 }, { "epoch": 1.390053924505692, "grad_norm": 2.8379740715026855, "learning_rate": 2.711317329029655e-05, "loss": 0.0714, "step": 2320 }, { "epoch": 1.396045536249251, "grad_norm": 2.2230288982391357, "learning_rate": 2.70123058301392e-05, "loss": 0.077, "step": 2330 }, { "epoch": 1.40203714799281, "grad_norm": 3.1027259826660156, "learning_rate": 2.6911438369981845e-05, "loss": 0.0796, "step": 2340 }, { "epoch": 1.4080287597363692, "grad_norm": 2.489982843399048, "learning_rate": 2.681057090982449e-05, "loss": 0.0727, "step": 2350 }, { "epoch": 1.414020371479928, "grad_norm": 2.224640130996704, "learning_rate": 2.670970344966714e-05, "loss": 0.0839, "step": 2360 }, { "epoch": 1.4200119832234872, "grad_norm": 2.718700647354126, "learning_rate": 2.6608835989509785e-05, "loss": 0.075, "step": 2370 }, { "epoch": 1.4260035949670462, "grad_norm": 2.6818034648895264, "learning_rate": 2.6507968529352434e-05, "loss": 0.0734, "step": 2380 }, { "epoch": 1.4319952067106052, "grad_norm": 1.8535293340682983, "learning_rate": 2.640710106919508e-05, "loss": 0.076, "step": 2390 }, { "epoch": 1.4379868184541642, "grad_norm": 2.8081820011138916, "learning_rate": 2.630623360903773e-05, "loss": 0.0707, "step": 2400 }, { "epoch": 1.4439784301977232, "grad_norm": 2.0238046646118164, "learning_rate": 2.6205366148880374e-05, "loss": 0.0765, "step": 2410 }, { "epoch": 1.4499700419412822, "grad_norm": 2.9945600032806396, "learning_rate": 2.6104498688723016e-05, "loss": 0.0907, "step": 2420 }, { "epoch": 1.4559616536848412, "grad_norm": 3.8100576400756836, "learning_rate": 2.6003631228565665e-05, "loss": 0.0873, "step": 2430 }, { "epoch": 1.4619532654284002, "grad_norm": 1.8465749025344849, "learning_rate": 2.590276376840831e-05, "loss": 0.0646, "step": 2440 }, { "epoch": 1.4679448771719592, "grad_norm": 2.084381103515625, "learning_rate": 2.580189630825096e-05, "loss": 0.0697, "step": 2450 }, { "epoch": 1.4739364889155182, "grad_norm": 2.089414358139038, "learning_rate": 2.5701028848093605e-05, "loss": 0.0697, "step": 2460 }, { "epoch": 1.4799281006590772, "grad_norm": 2.39705491065979, "learning_rate": 2.5600161387936254e-05, "loss": 0.0822, "step": 2470 }, { "epoch": 1.4859197124026364, "grad_norm": 2.2949612140655518, "learning_rate": 2.54992939277789e-05, "loss": 0.0883, "step": 2480 }, { "epoch": 1.4919113241461952, "grad_norm": 1.9815038442611694, "learning_rate": 2.5398426467621545e-05, "loss": 0.0712, "step": 2490 }, { "epoch": 1.4979029358897544, "grad_norm": 2.18231201171875, "learning_rate": 2.5297559007464194e-05, "loss": 0.09, "step": 2500 }, { "epoch": 1.5038945476333132, "grad_norm": 2.602478504180908, "learning_rate": 2.519669154730684e-05, "loss": 0.0837, "step": 2510 }, { "epoch": 1.5098861593768724, "grad_norm": 1.8262310028076172, "learning_rate": 2.509582408714949e-05, "loss": 0.0812, "step": 2520 }, { "epoch": 1.5158777711204314, "grad_norm": 2.6024467945098877, "learning_rate": 2.4994956626992134e-05, "loss": 0.0747, "step": 2530 }, { "epoch": 1.5218693828639904, "grad_norm": 2.4434356689453125, "learning_rate": 2.489408916683478e-05, "loss": 0.0883, "step": 2540 }, { "epoch": 1.5278609946075494, "grad_norm": 2.055715322494507, "learning_rate": 2.479322170667743e-05, "loss": 0.0709, "step": 2550 }, { "epoch": 1.5338526063511084, "grad_norm": 2.179154634475708, "learning_rate": 2.4692354246520074e-05, "loss": 0.084, "step": 2560 }, { "epoch": 1.5398442180946674, "grad_norm": 3.096709728240967, "learning_rate": 2.4591486786362723e-05, "loss": 0.0851, "step": 2570 }, { "epoch": 1.5458358298382264, "grad_norm": 2.3316915035247803, "learning_rate": 2.4490619326205365e-05, "loss": 0.0834, "step": 2580 }, { "epoch": 1.5518274415817856, "grad_norm": 1.9087728261947632, "learning_rate": 2.4389751866048014e-05, "loss": 0.0761, "step": 2590 }, { "epoch": 1.5578190533253444, "grad_norm": 2.3828837871551514, "learning_rate": 2.428888440589066e-05, "loss": 0.0888, "step": 2600 }, { "epoch": 1.5638106650689036, "grad_norm": 2.2623255252838135, "learning_rate": 2.418801694573331e-05, "loss": 0.0905, "step": 2610 }, { "epoch": 1.5698022768124624, "grad_norm": 2.371645212173462, "learning_rate": 2.4087149485575954e-05, "loss": 0.0674, "step": 2620 }, { "epoch": 1.5757938885560216, "grad_norm": 3.3547451496124268, "learning_rate": 2.3986282025418603e-05, "loss": 0.0785, "step": 2630 }, { "epoch": 1.5817855002995804, "grad_norm": 1.9994959831237793, "learning_rate": 2.3885414565261246e-05, "loss": 0.071, "step": 2640 }, { "epoch": 1.5877771120431396, "grad_norm": 2.294867992401123, "learning_rate": 2.3784547105103894e-05, "loss": 0.0733, "step": 2650 }, { "epoch": 1.5937687237866986, "grad_norm": 1.9338445663452148, "learning_rate": 2.368367964494654e-05, "loss": 0.0827, "step": 2660 }, { "epoch": 1.5997603355302576, "grad_norm": 2.6823794841766357, "learning_rate": 2.358281218478919e-05, "loss": 0.0698, "step": 2670 }, { "epoch": 1.6057519472738166, "grad_norm": 3.147641181945801, "learning_rate": 2.3481944724631835e-05, "loss": 0.0884, "step": 2680 }, { "epoch": 1.6117435590173756, "grad_norm": 1.8961377143859863, "learning_rate": 2.3381077264474483e-05, "loss": 0.0711, "step": 2690 }, { "epoch": 1.6177351707609346, "grad_norm": 3.8997535705566406, "learning_rate": 2.328020980431713e-05, "loss": 0.0672, "step": 2700 }, { "epoch": 1.6237267825044936, "grad_norm": 2.460813283920288, "learning_rate": 2.3179342344159775e-05, "loss": 0.0849, "step": 2710 }, { "epoch": 1.6297183942480529, "grad_norm": 2.1751198768615723, "learning_rate": 2.307847488400242e-05, "loss": 0.0921, "step": 2720 }, { "epoch": 1.6357100059916116, "grad_norm": 2.415151596069336, "learning_rate": 2.297760742384507e-05, "loss": 0.0747, "step": 2730 }, { "epoch": 1.6417016177351709, "grad_norm": 2.236166477203369, "learning_rate": 2.2876739963687715e-05, "loss": 0.0777, "step": 2740 }, { "epoch": 1.6476932294787296, "grad_norm": 1.8858668804168701, "learning_rate": 2.2775872503530364e-05, "loss": 0.0641, "step": 2750 }, { "epoch": 1.6536848412222889, "grad_norm": 3.2240684032440186, "learning_rate": 2.267500504337301e-05, "loss": 0.0656, "step": 2760 }, { "epoch": 1.6596764529658476, "grad_norm": 2.5699257850646973, "learning_rate": 2.2574137583215658e-05, "loss": 0.0718, "step": 2770 }, { "epoch": 1.6656680647094069, "grad_norm": 2.1655499935150146, "learning_rate": 2.24732701230583e-05, "loss": 0.0737, "step": 2780 }, { "epoch": 1.6716596764529659, "grad_norm": 2.6293349266052246, "learning_rate": 2.237240266290095e-05, "loss": 0.0742, "step": 2790 }, { "epoch": 1.6776512881965249, "grad_norm": 1.8740370273590088, "learning_rate": 2.2271535202743595e-05, "loss": 0.0577, "step": 2800 }, { "epoch": 1.6836428999400839, "grad_norm": 2.1657564640045166, "learning_rate": 2.2170667742586244e-05, "loss": 0.0703, "step": 2810 }, { "epoch": 1.6896345116836429, "grad_norm": 2.1452126502990723, "learning_rate": 2.206980028242889e-05, "loss": 0.0838, "step": 2820 }, { "epoch": 1.6956261234272019, "grad_norm": 2.4039194583892822, "learning_rate": 2.1968932822271538e-05, "loss": 0.092, "step": 2830 }, { "epoch": 1.7016177351707609, "grad_norm": 2.0987935066223145, "learning_rate": 2.186806536211418e-05, "loss": 0.0625, "step": 2840 }, { "epoch": 1.70760934691432, "grad_norm": 1.980102777481079, "learning_rate": 2.176719790195683e-05, "loss": 0.0812, "step": 2850 }, { "epoch": 1.7136009586578789, "grad_norm": 3.052550792694092, "learning_rate": 2.1666330441799475e-05, "loss": 0.069, "step": 2860 }, { "epoch": 1.719592570401438, "grad_norm": 2.373227834701538, "learning_rate": 2.1565462981642124e-05, "loss": 0.072, "step": 2870 }, { "epoch": 1.7255841821449969, "grad_norm": 3.045281410217285, "learning_rate": 2.146459552148477e-05, "loss": 0.0702, "step": 2880 }, { "epoch": 1.731575793888556, "grad_norm": 2.5577785968780518, "learning_rate": 2.1363728061327418e-05, "loss": 0.0785, "step": 2890 }, { "epoch": 1.737567405632115, "grad_norm": 2.3629047870635986, "learning_rate": 2.1262860601170064e-05, "loss": 0.0754, "step": 2900 }, { "epoch": 1.743559017375674, "grad_norm": 2.157334089279175, "learning_rate": 2.116199314101271e-05, "loss": 0.0644, "step": 2910 }, { "epoch": 1.749550629119233, "grad_norm": 2.86433482170105, "learning_rate": 2.1061125680855355e-05, "loss": 0.0834, "step": 2920 }, { "epoch": 1.755542240862792, "grad_norm": 3.452363967895508, "learning_rate": 2.0960258220698004e-05, "loss": 0.0682, "step": 2930 }, { "epoch": 1.761533852606351, "grad_norm": 1.9637304544448853, "learning_rate": 2.085939076054065e-05, "loss": 0.0663, "step": 2940 }, { "epoch": 1.76752546434991, "grad_norm": 2.607135772705078, "learning_rate": 2.0758523300383298e-05, "loss": 0.0865, "step": 2950 }, { "epoch": 1.773517076093469, "grad_norm": 3.233673095703125, "learning_rate": 2.0657655840225944e-05, "loss": 0.07, "step": 2960 }, { "epoch": 1.779508687837028, "grad_norm": 1.5085541009902954, "learning_rate": 2.055678838006859e-05, "loss": 0.0676, "step": 2970 }, { "epoch": 1.7855002995805873, "grad_norm": 2.5728871822357178, "learning_rate": 2.045592091991124e-05, "loss": 0.0674, "step": 2980 }, { "epoch": 1.791491911324146, "grad_norm": 2.0894975662231445, "learning_rate": 2.0355053459753884e-05, "loss": 0.0681, "step": 2990 }, { "epoch": 1.7974835230677053, "grad_norm": 3.2060320377349854, "learning_rate": 2.025418599959653e-05, "loss": 0.0767, "step": 3000 }, { "epoch": 1.7974835230677053, "eval_loss": 0.7448650002479553, "eval_runtime": 676.6864, "eval_samples_per_second": 2.096, "eval_steps_per_second": 0.132, "eval_wer": 0.460510536234367, "step": 3000 }, { "epoch": 1.803475134811264, "grad_norm": 1.9069411754608154, "learning_rate": 2.015331853943918e-05, "loss": 0.0594, "step": 3010 }, { "epoch": 1.8094667465548233, "grad_norm": 3.2389609813690186, "learning_rate": 2.0052451079281824e-05, "loss": 0.0869, "step": 3020 }, { "epoch": 1.8154583582983823, "grad_norm": 2.8738017082214355, "learning_rate": 1.9951583619124473e-05, "loss": 0.068, "step": 3030 }, { "epoch": 1.8214499700419413, "grad_norm": 2.5264780521392822, "learning_rate": 1.985071615896712e-05, "loss": 0.0745, "step": 3040 }, { "epoch": 1.8274415817855003, "grad_norm": 1.7072393894195557, "learning_rate": 1.9749848698809764e-05, "loss": 0.0732, "step": 3050 }, { "epoch": 1.8334331935290593, "grad_norm": 4.048574447631836, "learning_rate": 1.9648981238652413e-05, "loss": 0.0676, "step": 3060 }, { "epoch": 1.8394248052726183, "grad_norm": 2.7415385246276855, "learning_rate": 1.954811377849506e-05, "loss": 0.0658, "step": 3070 }, { "epoch": 1.8454164170161773, "grad_norm": 1.9306994676589966, "learning_rate": 1.9447246318337707e-05, "loss": 0.0841, "step": 3080 }, { "epoch": 1.8514080287597365, "grad_norm": 2.8990273475646973, "learning_rate": 1.9346378858180353e-05, "loss": 0.0743, "step": 3090 }, { "epoch": 1.8573996405032953, "grad_norm": 2.0166501998901367, "learning_rate": 1.9245511398023e-05, "loss": 0.079, "step": 3100 }, { "epoch": 1.8633912522468545, "grad_norm": 2.3217945098876953, "learning_rate": 1.9144643937865644e-05, "loss": 0.0645, "step": 3110 }, { "epoch": 1.8693828639904133, "grad_norm": 2.787743330001831, "learning_rate": 1.9043776477708293e-05, "loss": 0.0614, "step": 3120 }, { "epoch": 1.8753744757339725, "grad_norm": 1.7940049171447754, "learning_rate": 1.894290901755094e-05, "loss": 0.0607, "step": 3130 }, { "epoch": 1.8813660874775313, "grad_norm": 2.0448403358459473, "learning_rate": 1.8842041557393587e-05, "loss": 0.0784, "step": 3140 }, { "epoch": 1.8873576992210905, "grad_norm": 2.265869379043579, "learning_rate": 1.8741174097236233e-05, "loss": 0.0661, "step": 3150 }, { "epoch": 1.8933493109646495, "grad_norm": 1.9905105829238892, "learning_rate": 1.8640306637078882e-05, "loss": 0.0667, "step": 3160 }, { "epoch": 1.8993409227082085, "grad_norm": 1.5427712202072144, "learning_rate": 1.8539439176921524e-05, "loss": 0.0662, "step": 3170 }, { "epoch": 1.9053325344517675, "grad_norm": 1.6274867057800293, "learning_rate": 1.8438571716764173e-05, "loss": 0.0657, "step": 3180 }, { "epoch": 1.9113241461953265, "grad_norm": 1.5048216581344604, "learning_rate": 1.833770425660682e-05, "loss": 0.0741, "step": 3190 }, { "epoch": 1.9173157579388855, "grad_norm": 2.2717835903167725, "learning_rate": 1.8236836796449468e-05, "loss": 0.0601, "step": 3200 }, { "epoch": 1.9233073696824445, "grad_norm": 2.6843059062957764, "learning_rate": 1.8135969336292113e-05, "loss": 0.0697, "step": 3210 }, { "epoch": 1.9292989814260038, "grad_norm": 1.437799334526062, "learning_rate": 1.8035101876134762e-05, "loss": 0.0758, "step": 3220 }, { "epoch": 1.9352905931695625, "grad_norm": 2.8273918628692627, "learning_rate": 1.7934234415977408e-05, "loss": 0.0792, "step": 3230 }, { "epoch": 1.9412822049131218, "grad_norm": 2.177579641342163, "learning_rate": 1.7833366955820053e-05, "loss": 0.0659, "step": 3240 }, { "epoch": 1.9472738166566805, "grad_norm": 1.6548727750778198, "learning_rate": 1.77324994956627e-05, "loss": 0.064, "step": 3250 }, { "epoch": 1.9532654284002398, "grad_norm": 2.316882371902466, "learning_rate": 1.7631632035505348e-05, "loss": 0.0601, "step": 3260 }, { "epoch": 1.9592570401437985, "grad_norm": 2.911669969558716, "learning_rate": 1.7530764575347993e-05, "loss": 0.058, "step": 3270 }, { "epoch": 1.9652486518873578, "grad_norm": 2.2127685546875, "learning_rate": 1.7429897115190642e-05, "loss": 0.0745, "step": 3280 }, { "epoch": 1.9712402636309168, "grad_norm": 1.9832196235656738, "learning_rate": 1.7329029655033288e-05, "loss": 0.0723, "step": 3290 }, { "epoch": 1.9772318753744758, "grad_norm": 2.8020925521850586, "learning_rate": 1.7228162194875933e-05, "loss": 0.0676, "step": 3300 }, { "epoch": 1.9832234871180348, "grad_norm": 3.036353349685669, "learning_rate": 1.712729473471858e-05, "loss": 0.0776, "step": 3310 }, { "epoch": 1.9892150988615938, "grad_norm": 1.0084120035171509, "learning_rate": 1.7026427274561228e-05, "loss": 0.0559, "step": 3320 }, { "epoch": 1.9952067106051528, "grad_norm": 3.1513946056365967, "learning_rate": 1.6925559814403873e-05, "loss": 0.0773, "step": 3330 }, { "epoch": 2.0011983223487118, "grad_norm": 1.6514886617660522, "learning_rate": 1.6824692354246522e-05, "loss": 0.0555, "step": 3340 }, { "epoch": 2.007189934092271, "grad_norm": 1.4943495988845825, "learning_rate": 1.6723824894089168e-05, "loss": 0.0401, "step": 3350 }, { "epoch": 2.0131815458358298, "grad_norm": 2.2410435676574707, "learning_rate": 1.6622957433931817e-05, "loss": 0.0395, "step": 3360 }, { "epoch": 2.019173157579389, "grad_norm": 1.5972563028335571, "learning_rate": 1.652208997377446e-05, "loss": 0.0365, "step": 3370 }, { "epoch": 2.0251647693229478, "grad_norm": 1.4300589561462402, "learning_rate": 1.6421222513617108e-05, "loss": 0.0361, "step": 3380 }, { "epoch": 2.031156381066507, "grad_norm": 1.1573065519332886, "learning_rate": 1.6320355053459753e-05, "loss": 0.0401, "step": 3390 }, { "epoch": 2.0371479928100658, "grad_norm": 1.3025338649749756, "learning_rate": 1.6219487593302402e-05, "loss": 0.0295, "step": 3400 }, { "epoch": 2.043139604553625, "grad_norm": 2.5381758213043213, "learning_rate": 1.6118620133145048e-05, "loss": 0.0371, "step": 3410 }, { "epoch": 2.0491312162971838, "grad_norm": 2.166651487350464, "learning_rate": 1.6017752672987697e-05, "loss": 0.0335, "step": 3420 }, { "epoch": 2.055122828040743, "grad_norm": 1.688333511352539, "learning_rate": 1.5916885212830342e-05, "loss": 0.0399, "step": 3430 }, { "epoch": 2.061114439784302, "grad_norm": 2.373098611831665, "learning_rate": 1.5816017752672988e-05, "loss": 0.036, "step": 3440 }, { "epoch": 2.067106051527861, "grad_norm": 2.6142423152923584, "learning_rate": 1.5715150292515633e-05, "loss": 0.0377, "step": 3450 }, { "epoch": 2.07309766327142, "grad_norm": 1.7786281108856201, "learning_rate": 1.5614282832358282e-05, "loss": 0.0365, "step": 3460 }, { "epoch": 2.079089275014979, "grad_norm": 1.6953134536743164, "learning_rate": 1.5513415372200928e-05, "loss": 0.0401, "step": 3470 }, { "epoch": 2.085080886758538, "grad_norm": 2.5490822792053223, "learning_rate": 1.5412547912043577e-05, "loss": 0.0363, "step": 3480 }, { "epoch": 2.091072498502097, "grad_norm": 1.8173693418502808, "learning_rate": 1.5311680451886222e-05, "loss": 0.0425, "step": 3490 }, { "epoch": 2.097064110245656, "grad_norm": 2.4072582721710205, "learning_rate": 1.5210812991728868e-05, "loss": 0.0313, "step": 3500 }, { "epoch": 2.103055721989215, "grad_norm": 1.7706407308578491, "learning_rate": 1.5109945531571515e-05, "loss": 0.0436, "step": 3510 }, { "epoch": 2.109047333732774, "grad_norm": 1.4706944227218628, "learning_rate": 1.5009078071414162e-05, "loss": 0.0355, "step": 3520 }, { "epoch": 2.115038945476333, "grad_norm": 1.9873573780059814, "learning_rate": 1.490821061125681e-05, "loss": 0.0436, "step": 3530 }, { "epoch": 2.121030557219892, "grad_norm": 3.540144920349121, "learning_rate": 1.4807343151099457e-05, "loss": 0.0403, "step": 3540 }, { "epoch": 2.127022168963451, "grad_norm": 1.8939119577407837, "learning_rate": 1.4706475690942104e-05, "loss": 0.035, "step": 3550 }, { "epoch": 2.13301378070701, "grad_norm": 1.3639899492263794, "learning_rate": 1.4605608230784751e-05, "loss": 0.0402, "step": 3560 }, { "epoch": 2.139005392450569, "grad_norm": 1.7946410179138184, "learning_rate": 1.4504740770627395e-05, "loss": 0.0292, "step": 3570 }, { "epoch": 2.144997004194128, "grad_norm": 2.206691265106201, "learning_rate": 1.4403873310470043e-05, "loss": 0.0381, "step": 3580 }, { "epoch": 2.1509886159376874, "grad_norm": 1.7045485973358154, "learning_rate": 1.430300585031269e-05, "loss": 0.0325, "step": 3590 }, { "epoch": 2.156980227681246, "grad_norm": 1.582770586013794, "learning_rate": 1.4202138390155337e-05, "loss": 0.0286, "step": 3600 }, { "epoch": 2.1629718394248054, "grad_norm": 2.6559298038482666, "learning_rate": 1.4101270929997984e-05, "loss": 0.0388, "step": 3610 }, { "epoch": 2.168963451168364, "grad_norm": 2.094505786895752, "learning_rate": 1.4000403469840632e-05, "loss": 0.0347, "step": 3620 }, { "epoch": 2.1749550629119234, "grad_norm": 1.6940979957580566, "learning_rate": 1.3899536009683275e-05, "loss": 0.0472, "step": 3630 }, { "epoch": 2.180946674655482, "grad_norm": 2.0886759757995605, "learning_rate": 1.3798668549525923e-05, "loss": 0.0333, "step": 3640 }, { "epoch": 2.1869382863990414, "grad_norm": 1.226136565208435, "learning_rate": 1.369780108936857e-05, "loss": 0.0301, "step": 3650 }, { "epoch": 2.1929298981426, "grad_norm": 1.9858462810516357, "learning_rate": 1.3596933629211217e-05, "loss": 0.0357, "step": 3660 }, { "epoch": 2.1989215098861594, "grad_norm": 1.2248085737228394, "learning_rate": 1.3496066169053864e-05, "loss": 0.0282, "step": 3670 }, { "epoch": 2.204913121629718, "grad_norm": 1.8398869037628174, "learning_rate": 1.3395198708896512e-05, "loss": 0.0376, "step": 3680 }, { "epoch": 2.2109047333732774, "grad_norm": 2.07497501373291, "learning_rate": 1.3294331248739159e-05, "loss": 0.0435, "step": 3690 }, { "epoch": 2.2168963451168366, "grad_norm": 1.5605896711349487, "learning_rate": 1.3193463788581803e-05, "loss": 0.0502, "step": 3700 }, { "epoch": 2.2228879568603954, "grad_norm": 1.877122402191162, "learning_rate": 1.309259632842445e-05, "loss": 0.0326, "step": 3710 }, { "epoch": 2.2288795686039546, "grad_norm": 1.2292389869689941, "learning_rate": 1.2991728868267097e-05, "loss": 0.0413, "step": 3720 }, { "epoch": 2.2348711803475134, "grad_norm": 3.2773098945617676, "learning_rate": 1.2890861408109744e-05, "loss": 0.0345, "step": 3730 }, { "epoch": 2.2408627920910726, "grad_norm": 1.8774651288986206, "learning_rate": 1.2789993947952392e-05, "loss": 0.0361, "step": 3740 }, { "epoch": 2.2468544038346314, "grad_norm": 2.3334178924560547, "learning_rate": 1.2689126487795039e-05, "loss": 0.0275, "step": 3750 }, { "epoch": 2.2528460155781906, "grad_norm": 2.0709402561187744, "learning_rate": 1.2588259027637686e-05, "loss": 0.0474, "step": 3760 }, { "epoch": 2.2588376273217494, "grad_norm": 1.9124011993408203, "learning_rate": 1.2487391567480332e-05, "loss": 0.0331, "step": 3770 }, { "epoch": 2.2648292390653086, "grad_norm": 1.548407793045044, "learning_rate": 1.2386524107322977e-05, "loss": 0.0325, "step": 3780 }, { "epoch": 2.2708208508088674, "grad_norm": 1.7963712215423584, "learning_rate": 1.2285656647165625e-05, "loss": 0.0388, "step": 3790 }, { "epoch": 2.2768124625524266, "grad_norm": 1.190773606300354, "learning_rate": 1.2184789187008272e-05, "loss": 0.034, "step": 3800 }, { "epoch": 2.282804074295986, "grad_norm": 2.5754892826080322, "learning_rate": 1.2083921726850919e-05, "loss": 0.0351, "step": 3810 }, { "epoch": 2.2887956860395446, "grad_norm": 1.9364176988601685, "learning_rate": 1.1983054266693565e-05, "loss": 0.0353, "step": 3820 }, { "epoch": 2.2947872977831034, "grad_norm": 2.3457119464874268, "learning_rate": 1.1882186806536212e-05, "loss": 0.0322, "step": 3830 }, { "epoch": 2.3007789095266626, "grad_norm": 2.6304166316986084, "learning_rate": 1.1781319346378859e-05, "loss": 0.0396, "step": 3840 }, { "epoch": 2.306770521270222, "grad_norm": 2.0976033210754395, "learning_rate": 1.1680451886221505e-05, "loss": 0.0313, "step": 3850 }, { "epoch": 2.3127621330137806, "grad_norm": 1.447740912437439, "learning_rate": 1.1579584426064152e-05, "loss": 0.0307, "step": 3860 }, { "epoch": 2.31875374475734, "grad_norm": 1.0516040325164795, "learning_rate": 1.1478716965906799e-05, "loss": 0.0298, "step": 3870 }, { "epoch": 2.3247453565008986, "grad_norm": 1.3127169609069824, "learning_rate": 1.1377849505749445e-05, "loss": 0.0368, "step": 3880 }, { "epoch": 2.330736968244458, "grad_norm": 1.0407856702804565, "learning_rate": 1.1276982045592092e-05, "loss": 0.0379, "step": 3890 }, { "epoch": 2.3367285799880166, "grad_norm": 2.1066038608551025, "learning_rate": 1.1176114585434739e-05, "loss": 0.0388, "step": 3900 }, { "epoch": 2.342720191731576, "grad_norm": 2.169619560241699, "learning_rate": 1.1075247125277385e-05, "loss": 0.0339, "step": 3910 }, { "epoch": 2.3487118034751346, "grad_norm": 1.432060956954956, "learning_rate": 1.0974379665120032e-05, "loss": 0.0393, "step": 3920 }, { "epoch": 2.354703415218694, "grad_norm": 2.5926220417022705, "learning_rate": 1.087351220496268e-05, "loss": 0.042, "step": 3930 }, { "epoch": 2.3606950269622526, "grad_norm": 1.5448261499404907, "learning_rate": 1.0772644744805326e-05, "loss": 0.0364, "step": 3940 }, { "epoch": 2.366686638705812, "grad_norm": 1.302937388420105, "learning_rate": 1.0671777284647972e-05, "loss": 0.0408, "step": 3950 }, { "epoch": 2.372678250449371, "grad_norm": 1.1434581279754639, "learning_rate": 1.057090982449062e-05, "loss": 0.0334, "step": 3960 }, { "epoch": 2.37866986219293, "grad_norm": 1.498061180114746, "learning_rate": 1.0470042364333266e-05, "loss": 0.0356, "step": 3970 }, { "epoch": 2.384661473936489, "grad_norm": 2.3065295219421387, "learning_rate": 1.0369174904175914e-05, "loss": 0.0395, "step": 3980 }, { "epoch": 2.390653085680048, "grad_norm": 2.0692338943481445, "learning_rate": 1.026830744401856e-05, "loss": 0.0313, "step": 3990 }, { "epoch": 2.396644697423607, "grad_norm": 2.2149441242218018, "learning_rate": 1.0167439983861206e-05, "loss": 0.0357, "step": 4000 }, { "epoch": 2.396644697423607, "eval_loss": 0.7519157528877258, "eval_runtime": 689.0466, "eval_samples_per_second": 2.058, "eval_steps_per_second": 0.129, "eval_wer": 0.45348637998972074, "step": 4000 }, { "epoch": 2.402636309167166, "grad_norm": 1.8555347919464111, "learning_rate": 1.0066572523703854e-05, "loss": 0.0276, "step": 4010 }, { "epoch": 2.408627920910725, "grad_norm": 0.9930511116981506, "learning_rate": 9.965705063546501e-06, "loss": 0.0313, "step": 4020 }, { "epoch": 2.414619532654284, "grad_norm": 1.6992213726043701, "learning_rate": 9.864837603389147e-06, "loss": 0.0314, "step": 4030 }, { "epoch": 2.420611144397843, "grad_norm": 1.9412932395935059, "learning_rate": 9.763970143231794e-06, "loss": 0.0328, "step": 4040 }, { "epoch": 2.426602756141402, "grad_norm": 2.8163645267486572, "learning_rate": 9.663102683074441e-06, "loss": 0.0401, "step": 4050 }, { "epoch": 2.432594367884961, "grad_norm": 2.6094470024108887, "learning_rate": 9.562235222917088e-06, "loss": 0.034, "step": 4060 }, { "epoch": 2.4385859796285203, "grad_norm": 1.896148443222046, "learning_rate": 9.461367762759736e-06, "loss": 0.0333, "step": 4070 }, { "epoch": 2.444577591372079, "grad_norm": 2.1546242237091064, "learning_rate": 9.360500302602381e-06, "loss": 0.0388, "step": 4080 }, { "epoch": 2.450569203115638, "grad_norm": 1.583756446838379, "learning_rate": 9.259632842445028e-06, "loss": 0.0279, "step": 4090 }, { "epoch": 2.456560814859197, "grad_norm": 1.8957688808441162, "learning_rate": 9.158765382287676e-06, "loss": 0.0347, "step": 4100 }, { "epoch": 2.4625524266027563, "grad_norm": 1.2544060945510864, "learning_rate": 9.057897922130321e-06, "loss": 0.0384, "step": 4110 }, { "epoch": 2.468544038346315, "grad_norm": 1.9352102279663086, "learning_rate": 8.957030461972968e-06, "loss": 0.0422, "step": 4120 }, { "epoch": 2.4745356500898743, "grad_norm": 1.348984956741333, "learning_rate": 8.856163001815616e-06, "loss": 0.029, "step": 4130 }, { "epoch": 2.480527261833433, "grad_norm": 1.6167041063308716, "learning_rate": 8.755295541658263e-06, "loss": 0.0311, "step": 4140 }, { "epoch": 2.4865188735769923, "grad_norm": 2.604578971862793, "learning_rate": 8.654428081500908e-06, "loss": 0.0362, "step": 4150 }, { "epoch": 2.492510485320551, "grad_norm": 1.353280782699585, "learning_rate": 8.553560621343556e-06, "loss": 0.0323, "step": 4160 }, { "epoch": 2.4985020970641103, "grad_norm": 1.6819239854812622, "learning_rate": 8.452693161186203e-06, "loss": 0.0356, "step": 4170 }, { "epoch": 2.5044937088076695, "grad_norm": 1.5543423891067505, "learning_rate": 8.351825701028848e-06, "loss": 0.0283, "step": 4180 }, { "epoch": 2.5104853205512283, "grad_norm": 1.8067575693130493, "learning_rate": 8.250958240871496e-06, "loss": 0.0251, "step": 4190 }, { "epoch": 2.516476932294787, "grad_norm": 1.6408029794692993, "learning_rate": 8.150090780714143e-06, "loss": 0.032, "step": 4200 }, { "epoch": 2.5224685440383463, "grad_norm": 2.8149192333221436, "learning_rate": 8.049223320556788e-06, "loss": 0.0356, "step": 4210 }, { "epoch": 2.5284601557819055, "grad_norm": 2.3816680908203125, "learning_rate": 7.948355860399436e-06, "loss": 0.0508, "step": 4220 }, { "epoch": 2.5344517675254643, "grad_norm": 0.8424966335296631, "learning_rate": 7.847488400242083e-06, "loss": 0.0319, "step": 4230 }, { "epoch": 2.5404433792690235, "grad_norm": 2.407496213912964, "learning_rate": 7.746620940084729e-06, "loss": 0.0437, "step": 4240 }, { "epoch": 2.5464349910125823, "grad_norm": 1.2954119443893433, "learning_rate": 7.645753479927376e-06, "loss": 0.0294, "step": 4250 }, { "epoch": 2.5524266027561415, "grad_norm": 2.2885632514953613, "learning_rate": 7.544886019770023e-06, "loss": 0.0293, "step": 4260 }, { "epoch": 2.5584182144997003, "grad_norm": 1.5569554567337036, "learning_rate": 7.44401855961267e-06, "loss": 0.0306, "step": 4270 }, { "epoch": 2.5644098262432595, "grad_norm": 1.3652706146240234, "learning_rate": 7.343151099455316e-06, "loss": 0.0364, "step": 4280 }, { "epoch": 2.5704014379868183, "grad_norm": 1.461018443107605, "learning_rate": 7.242283639297963e-06, "loss": 0.0321, "step": 4290 }, { "epoch": 2.5763930497303775, "grad_norm": 1.4211386442184448, "learning_rate": 7.14141617914061e-06, "loss": 0.0278, "step": 4300 }, { "epoch": 2.5823846614739363, "grad_norm": 1.122140645980835, "learning_rate": 7.040548718983256e-06, "loss": 0.0257, "step": 4310 }, { "epoch": 2.5883762732174955, "grad_norm": 2.3963217735290527, "learning_rate": 6.939681258825903e-06, "loss": 0.0406, "step": 4320 }, { "epoch": 2.5943678849610547, "grad_norm": 2.619145154953003, "learning_rate": 6.83881379866855e-06, "loss": 0.0356, "step": 4330 }, { "epoch": 2.6003594967046135, "grad_norm": 2.836984872817993, "learning_rate": 6.737946338511196e-06, "loss": 0.0332, "step": 4340 }, { "epoch": 2.6063511084481723, "grad_norm": 1.085598349571228, "learning_rate": 6.637078878353843e-06, "loss": 0.0326, "step": 4350 }, { "epoch": 2.6123427201917315, "grad_norm": 1.6264371871948242, "learning_rate": 6.53621141819649e-06, "loss": 0.034, "step": 4360 }, { "epoch": 2.6183343319352907, "grad_norm": 2.668785333633423, "learning_rate": 6.435343958039138e-06, "loss": 0.0346, "step": 4370 }, { "epoch": 2.6243259436788495, "grad_norm": 1.0972654819488525, "learning_rate": 6.3445632438975195e-06, "loss": 0.0277, "step": 4380 }, { "epoch": 2.6303175554224087, "grad_norm": 2.0440616607666016, "learning_rate": 6.243695783740166e-06, "loss": 0.0345, "step": 4390 }, { "epoch": 2.6363091671659675, "grad_norm": 1.2406848669052124, "learning_rate": 6.142828323582812e-06, "loss": 0.0255, "step": 4400 }, { "epoch": 2.6423007789095267, "grad_norm": 2.209808111190796, "learning_rate": 6.0419608634254595e-06, "loss": 0.0344, "step": 4410 }, { "epoch": 2.6482923906530855, "grad_norm": 1.178346037864685, "learning_rate": 5.941093403268106e-06, "loss": 0.034, "step": 4420 }, { "epoch": 2.6542840023966447, "grad_norm": 1.8238011598587036, "learning_rate": 5.840225943110752e-06, "loss": 0.0334, "step": 4430 }, { "epoch": 2.660275614140204, "grad_norm": 1.1327911615371704, "learning_rate": 5.7393584829533995e-06, "loss": 0.0332, "step": 4440 }, { "epoch": 2.6662672258837627, "grad_norm": 1.3287112712860107, "learning_rate": 5.638491022796046e-06, "loss": 0.0341, "step": 4450 }, { "epoch": 2.6722588376273215, "grad_norm": 1.4079091548919678, "learning_rate": 5.537623562638692e-06, "loss": 0.0295, "step": 4460 }, { "epoch": 2.6782504493708807, "grad_norm": 1.7478376626968384, "learning_rate": 5.43675610248134e-06, "loss": 0.0289, "step": 4470 }, { "epoch": 2.68424206111444, "grad_norm": 1.264457106590271, "learning_rate": 5.335888642323986e-06, "loss": 0.0269, "step": 4480 }, { "epoch": 2.6902336728579987, "grad_norm": 1.1999608278274536, "learning_rate": 5.235021182166633e-06, "loss": 0.0341, "step": 4490 }, { "epoch": 2.696225284601558, "grad_norm": 2.0491466522216797, "learning_rate": 5.13415372200928e-06, "loss": 0.0383, "step": 4500 }, { "epoch": 2.7022168963451167, "grad_norm": 1.9697667360305786, "learning_rate": 5.033286261851927e-06, "loss": 0.0347, "step": 4510 }, { "epoch": 2.708208508088676, "grad_norm": 1.3339002132415771, "learning_rate": 4.932418801694573e-06, "loss": 0.0339, "step": 4520 }, { "epoch": 2.7142001198322347, "grad_norm": 2.346050500869751, "learning_rate": 4.8315513415372205e-06, "loss": 0.0317, "step": 4530 }, { "epoch": 2.720191731575794, "grad_norm": 1.9801725149154663, "learning_rate": 4.730683881379868e-06, "loss": 0.034, "step": 4540 }, { "epoch": 2.7261833433193527, "grad_norm": 1.5184906721115112, "learning_rate": 4.629816421222514e-06, "loss": 0.0294, "step": 4550 }, { "epoch": 2.732174955062912, "grad_norm": 1.131451964378357, "learning_rate": 4.5289489610651606e-06, "loss": 0.0345, "step": 4560 }, { "epoch": 2.7381665668064707, "grad_norm": 1.6129716634750366, "learning_rate": 4.428081500907808e-06, "loss": 0.0307, "step": 4570 }, { "epoch": 2.74415817855003, "grad_norm": 1.9320335388183594, "learning_rate": 4.327214040750454e-06, "loss": 0.03, "step": 4580 }, { "epoch": 2.750149790293589, "grad_norm": 1.6482346057891846, "learning_rate": 4.2263465805931014e-06, "loss": 0.0293, "step": 4590 }, { "epoch": 2.756141402037148, "grad_norm": 2.033869504928589, "learning_rate": 4.125479120435748e-06, "loss": 0.0157, "step": 4600 }, { "epoch": 2.7621330137807067, "grad_norm": 1.0993421077728271, "learning_rate": 4.024611660278394e-06, "loss": 0.027, "step": 4610 }, { "epoch": 2.768124625524266, "grad_norm": 1.4427032470703125, "learning_rate": 3.9237442001210415e-06, "loss": 0.0315, "step": 4620 }, { "epoch": 2.774116237267825, "grad_norm": 1.7813125848770142, "learning_rate": 3.822876739963688e-06, "loss": 0.0311, "step": 4630 }, { "epoch": 2.780107849011384, "grad_norm": 1.579520583152771, "learning_rate": 3.722009279806335e-06, "loss": 0.0254, "step": 4640 }, { "epoch": 2.786099460754943, "grad_norm": 1.8535341024398804, "learning_rate": 3.6211418196489815e-06, "loss": 0.0379, "step": 4650 }, { "epoch": 2.792091072498502, "grad_norm": 1.1260581016540527, "learning_rate": 3.520274359491628e-06, "loss": 0.0292, "step": 4660 }, { "epoch": 2.798082684242061, "grad_norm": 1.3263887166976929, "learning_rate": 3.419406899334275e-06, "loss": 0.0332, "step": 4670 }, { "epoch": 2.80407429598562, "grad_norm": 1.6725775003433228, "learning_rate": 3.3185394391769216e-06, "loss": 0.0324, "step": 4680 }, { "epoch": 2.810065907729179, "grad_norm": 1.6184289455413818, "learning_rate": 3.217671979019569e-06, "loss": 0.0201, "step": 4690 }, { "epoch": 2.8160575194727384, "grad_norm": 1.532372236251831, "learning_rate": 3.116804518862215e-06, "loss": 0.0311, "step": 4700 }, { "epoch": 2.822049131216297, "grad_norm": 1.2122362852096558, "learning_rate": 3.015937058704862e-06, "loss": 0.0264, "step": 4710 }, { "epoch": 2.828040742959856, "grad_norm": 2.1291024684906006, "learning_rate": 2.9150695985475084e-06, "loss": 0.0279, "step": 4720 }, { "epoch": 2.834032354703415, "grad_norm": 1.5369389057159424, "learning_rate": 2.8142021383901552e-06, "loss": 0.0311, "step": 4730 }, { "epoch": 2.8400239664469744, "grad_norm": 1.5334845781326294, "learning_rate": 2.713334678232802e-06, "loss": 0.032, "step": 4740 }, { "epoch": 2.846015578190533, "grad_norm": 0.8564344048500061, "learning_rate": 2.612467218075449e-06, "loss": 0.0242, "step": 4750 }, { "epoch": 2.8520071899340924, "grad_norm": 1.7857556343078613, "learning_rate": 2.5115997579180957e-06, "loss": 0.0398, "step": 4760 }, { "epoch": 2.857998801677651, "grad_norm": 1.5189533233642578, "learning_rate": 2.4107322977607425e-06, "loss": 0.0376, "step": 4770 }, { "epoch": 2.8639904134212104, "grad_norm": 1.8716094493865967, "learning_rate": 2.3098648376033894e-06, "loss": 0.022, "step": 4780 }, { "epoch": 2.869982025164769, "grad_norm": 1.2873154878616333, "learning_rate": 2.208997377446036e-06, "loss": 0.0252, "step": 4790 }, { "epoch": 2.8759736369083284, "grad_norm": 1.3582782745361328, "learning_rate": 2.108129917288683e-06, "loss": 0.0255, "step": 4800 }, { "epoch": 2.8819652486518876, "grad_norm": 1.480502724647522, "learning_rate": 2.00726245713133e-06, "loss": 0.038, "step": 4810 }, { "epoch": 2.8879568603954464, "grad_norm": 2.472153902053833, "learning_rate": 1.9063949969739762e-06, "loss": 0.04, "step": 4820 }, { "epoch": 2.893948472139005, "grad_norm": 2.1943459510803223, "learning_rate": 1.805527536816623e-06, "loss": 0.029, "step": 4830 }, { "epoch": 2.8999400838825644, "grad_norm": 0.9275830388069153, "learning_rate": 1.7046600766592699e-06, "loss": 0.0283, "step": 4840 }, { "epoch": 2.9059316956261236, "grad_norm": 1.082454800605774, "learning_rate": 1.6037926165019167e-06, "loss": 0.0301, "step": 4850 }, { "epoch": 2.9119233073696824, "grad_norm": 1.5443400144577026, "learning_rate": 1.5029251563445633e-06, "loss": 0.0257, "step": 4860 }, { "epoch": 2.917914919113241, "grad_norm": 2.1504111289978027, "learning_rate": 1.40205769618721e-06, "loss": 0.0294, "step": 4870 }, { "epoch": 2.9239065308568004, "grad_norm": 2.696934223175049, "learning_rate": 1.3011902360298567e-06, "loss": 0.0324, "step": 4880 }, { "epoch": 2.9298981426003596, "grad_norm": 1.6266438961029053, "learning_rate": 1.2003227758725035e-06, "loss": 0.0253, "step": 4890 }, { "epoch": 2.9358897543439184, "grad_norm": 1.217948317527771, "learning_rate": 1.0994553157151504e-06, "loss": 0.0351, "step": 4900 }, { "epoch": 2.9418813660874776, "grad_norm": 1.121309518814087, "learning_rate": 9.985878555577972e-07, "loss": 0.028, "step": 4910 }, { "epoch": 2.9478729778310364, "grad_norm": 2.2428746223449707, "learning_rate": 8.977203954004438e-07, "loss": 0.0375, "step": 4920 }, { "epoch": 2.9538645895745956, "grad_norm": 1.1667921543121338, "learning_rate": 7.968529352430906e-07, "loss": 0.0207, "step": 4930 }, { "epoch": 2.9598562013181544, "grad_norm": 1.7099730968475342, "learning_rate": 6.959854750857373e-07, "loss": 0.0331, "step": 4940 }, { "epoch": 2.9658478130617136, "grad_norm": 1.3251785039901733, "learning_rate": 5.951180149283842e-07, "loss": 0.0305, "step": 4950 }, { "epoch": 2.971839424805273, "grad_norm": 0.9592264890670776, "learning_rate": 4.942505547710309e-07, "loss": 0.0225, "step": 4960 }, { "epoch": 2.9778310365488316, "grad_norm": 1.1208076477050781, "learning_rate": 3.933830946136777e-07, "loss": 0.0253, "step": 4970 }, { "epoch": 2.9838226482923904, "grad_norm": 2.31288743019104, "learning_rate": 2.925156344563244e-07, "loss": 0.0379, "step": 4980 }, { "epoch": 2.9898142600359496, "grad_norm": 1.670334815979004, "learning_rate": 1.9164817429897115e-07, "loss": 0.0353, "step": 4990 }, { "epoch": 2.995805871779509, "grad_norm": 1.1539229154586792, "learning_rate": 9.078071414161792e-08, "loss": 0.0363, "step": 5000 }, { "epoch": 2.995805871779509, "eval_loss": 0.7503044009208679, "eval_runtime": 686.3834, "eval_samples_per_second": 2.066, "eval_steps_per_second": 0.13, "eval_wer": 0.44463480098223973, "step": 5000 } ], "logging_steps": 10, "max_steps": 5007, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.1887996928e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }