Whisper-Base-Khmer-V4 / trainer_state.json
Vira21's picture
Upload 16 files
ad97b05 verified
{
"best_metric": 0.44463480098223973,
"best_model_checkpoint": "d:\\DataTicon\\Whisper-Khmer-Small\\whisper-khmer\\outputs\\whisper-base-khmer\\checkpoint-5000",
"epoch": 2.995805871779509,
"eval_steps": 1000,
"global_step": 5000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.005991611743559017,
"grad_norm": 2.331707239151001,
"learning_rate": 1e-05,
"loss": 0.0943,
"step": 10
},
{
"epoch": 0.011983223487118035,
"grad_norm": 1.523736596107483,
"learning_rate": 2e-05,
"loss": 0.0746,
"step": 20
},
{
"epoch": 0.017974835230677052,
"grad_norm": 3.2414867877960205,
"learning_rate": 3e-05,
"loss": 0.0876,
"step": 30
},
{
"epoch": 0.02396644697423607,
"grad_norm": 3.187774419784546,
"learning_rate": 4e-05,
"loss": 0.094,
"step": 40
},
{
"epoch": 0.029958058717795086,
"grad_norm": 2.1969003677368164,
"learning_rate": 5e-05,
"loss": 0.0975,
"step": 50
},
{
"epoch": 0.035949670461354104,
"grad_norm": 3.237760543823242,
"learning_rate": 4.9899132539842645e-05,
"loss": 0.1178,
"step": 60
},
{
"epoch": 0.041941282204913125,
"grad_norm": 4.322758197784424,
"learning_rate": 4.9798265079685293e-05,
"loss": 0.1447,
"step": 70
},
{
"epoch": 0.04793289394847214,
"grad_norm": 3.5539541244506836,
"learning_rate": 4.9707484365543676e-05,
"loss": 0.1321,
"step": 80
},
{
"epoch": 0.05392450569203116,
"grad_norm": 2.4829070568084717,
"learning_rate": 4.9606616905386325e-05,
"loss": 0.1137,
"step": 90
},
{
"epoch": 0.05991611743559017,
"grad_norm": 3.877744197845459,
"learning_rate": 4.9505749445228974e-05,
"loss": 0.1218,
"step": 100
},
{
"epoch": 0.0659077291791492,
"grad_norm": 3.387650728225708,
"learning_rate": 4.9404881985071616e-05,
"loss": 0.1253,
"step": 110
},
{
"epoch": 0.07189934092270821,
"grad_norm": 2.919637441635132,
"learning_rate": 4.9304014524914265e-05,
"loss": 0.1181,
"step": 120
},
{
"epoch": 0.07789095266626722,
"grad_norm": 3.4895029067993164,
"learning_rate": 4.920314706475691e-05,
"loss": 0.1111,
"step": 130
},
{
"epoch": 0.08388256440982625,
"grad_norm": 4.521254062652588,
"learning_rate": 4.910227960459956e-05,
"loss": 0.1314,
"step": 140
},
{
"epoch": 0.08987417615338526,
"grad_norm": 3.0622236728668213,
"learning_rate": 4.9001412144442205e-05,
"loss": 0.1555,
"step": 150
},
{
"epoch": 0.09586578789694428,
"grad_norm": 4.06223726272583,
"learning_rate": 4.8900544684284854e-05,
"loss": 0.1388,
"step": 160
},
{
"epoch": 0.10185739964050329,
"grad_norm": 3.4601869583129883,
"learning_rate": 4.8799677224127496e-05,
"loss": 0.1491,
"step": 170
},
{
"epoch": 0.10784901138406232,
"grad_norm": 5.160483360290527,
"learning_rate": 4.8698809763970145e-05,
"loss": 0.1523,
"step": 180
},
{
"epoch": 0.11384062312762133,
"grad_norm": 4.109564781188965,
"learning_rate": 4.8597942303812794e-05,
"loss": 0.1391,
"step": 190
},
{
"epoch": 0.11983223487118035,
"grad_norm": 2.8160319328308105,
"learning_rate": 4.8497074843655436e-05,
"loss": 0.1132,
"step": 200
},
{
"epoch": 0.12582384661473936,
"grad_norm": 3.797750473022461,
"learning_rate": 4.8396207383498085e-05,
"loss": 0.1243,
"step": 210
},
{
"epoch": 0.1318154583582984,
"grad_norm": 3.033773422241211,
"learning_rate": 4.8295339923340734e-05,
"loss": 0.1565,
"step": 220
},
{
"epoch": 0.1378070701018574,
"grad_norm": 4.846673011779785,
"learning_rate": 4.8194472463183376e-05,
"loss": 0.1666,
"step": 230
},
{
"epoch": 0.14379868184541642,
"grad_norm": 2.80188250541687,
"learning_rate": 4.8093605003026025e-05,
"loss": 0.1502,
"step": 240
},
{
"epoch": 0.14979029358897544,
"grad_norm": 3.6848764419555664,
"learning_rate": 4.7992737542868674e-05,
"loss": 0.1459,
"step": 250
},
{
"epoch": 0.15578190533253444,
"grad_norm": 2.692138195037842,
"learning_rate": 4.789187008271132e-05,
"loss": 0.1204,
"step": 260
},
{
"epoch": 0.16177351707609347,
"grad_norm": 3.547896385192871,
"learning_rate": 4.7791002622553965e-05,
"loss": 0.1556,
"step": 270
},
{
"epoch": 0.1677651288196525,
"grad_norm": 3.1380774974823,
"learning_rate": 4.7690135162396614e-05,
"loss": 0.1273,
"step": 280
},
{
"epoch": 0.1737567405632115,
"grad_norm": 3.2093448638916016,
"learning_rate": 4.7589267702239256e-05,
"loss": 0.1354,
"step": 290
},
{
"epoch": 0.17974835230677053,
"grad_norm": 3.5889291763305664,
"learning_rate": 4.7488400242081905e-05,
"loss": 0.1215,
"step": 300
},
{
"epoch": 0.18573996405032953,
"grad_norm": 3.47147536277771,
"learning_rate": 4.7387532781924554e-05,
"loss": 0.1406,
"step": 310
},
{
"epoch": 0.19173157579388855,
"grad_norm": 2.5742290019989014,
"learning_rate": 4.7286665321767197e-05,
"loss": 0.1316,
"step": 320
},
{
"epoch": 0.19772318753744758,
"grad_norm": 2.653899908065796,
"learning_rate": 4.7185797861609845e-05,
"loss": 0.1333,
"step": 330
},
{
"epoch": 0.20371479928100658,
"grad_norm": 3.630166530609131,
"learning_rate": 4.7084930401452494e-05,
"loss": 0.1312,
"step": 340
},
{
"epoch": 0.2097064110245656,
"grad_norm": 2.3841392993927,
"learning_rate": 4.698406294129514e-05,
"loss": 0.1453,
"step": 350
},
{
"epoch": 0.21569802276812464,
"grad_norm": 3.2746708393096924,
"learning_rate": 4.6883195481137785e-05,
"loss": 0.139,
"step": 360
},
{
"epoch": 0.22168963451168364,
"grad_norm": 4.752730369567871,
"learning_rate": 4.6782328020980434e-05,
"loss": 0.1154,
"step": 370
},
{
"epoch": 0.22768124625524266,
"grad_norm": 2.3728339672088623,
"learning_rate": 4.668146056082308e-05,
"loss": 0.1293,
"step": 380
},
{
"epoch": 0.23367285799880166,
"grad_norm": 3.4194209575653076,
"learning_rate": 4.6580593100665726e-05,
"loss": 0.1378,
"step": 390
},
{
"epoch": 0.2396644697423607,
"grad_norm": 5.167896747589111,
"learning_rate": 4.6479725640508374e-05,
"loss": 0.1268,
"step": 400
},
{
"epoch": 0.24565608148591972,
"grad_norm": 4.707974910736084,
"learning_rate": 4.6378858180351023e-05,
"loss": 0.1439,
"step": 410
},
{
"epoch": 0.2516476932294787,
"grad_norm": 3.6553096771240234,
"learning_rate": 4.627799072019367e-05,
"loss": 0.1482,
"step": 420
},
{
"epoch": 0.2576393049730378,
"grad_norm": 2.85052227973938,
"learning_rate": 4.6177123260036315e-05,
"loss": 0.1349,
"step": 430
},
{
"epoch": 0.2636309167165968,
"grad_norm": 3.3663010597229004,
"learning_rate": 4.607625579987896e-05,
"loss": 0.1311,
"step": 440
},
{
"epoch": 0.2696225284601558,
"grad_norm": 4.309895992279053,
"learning_rate": 4.5975388339721606e-05,
"loss": 0.1204,
"step": 450
},
{
"epoch": 0.2756141402037148,
"grad_norm": 4.174650192260742,
"learning_rate": 4.5874520879564255e-05,
"loss": 0.135,
"step": 460
},
{
"epoch": 0.28160575194727383,
"grad_norm": 3.756552219390869,
"learning_rate": 4.5773653419406903e-05,
"loss": 0.164,
"step": 470
},
{
"epoch": 0.28759736369083283,
"grad_norm": 3.187208890914917,
"learning_rate": 4.5672785959249546e-05,
"loss": 0.1262,
"step": 480
},
{
"epoch": 0.29358897543439183,
"grad_norm": 4.308648586273193,
"learning_rate": 4.5571918499092195e-05,
"loss": 0.1408,
"step": 490
},
{
"epoch": 0.2995805871779509,
"grad_norm": 4.406998634338379,
"learning_rate": 4.5471051038934844e-05,
"loss": 0.1486,
"step": 500
},
{
"epoch": 0.3055721989215099,
"grad_norm": 2.7028679847717285,
"learning_rate": 4.5370183578777486e-05,
"loss": 0.1305,
"step": 510
},
{
"epoch": 0.3115638106650689,
"grad_norm": 2.9602255821228027,
"learning_rate": 4.5269316118620135e-05,
"loss": 0.1294,
"step": 520
},
{
"epoch": 0.31755542240862794,
"grad_norm": 4.902966499328613,
"learning_rate": 4.5168448658462784e-05,
"loss": 0.167,
"step": 530
},
{
"epoch": 0.32354703415218694,
"grad_norm": 3.3013415336608887,
"learning_rate": 4.506758119830543e-05,
"loss": 0.1293,
"step": 540
},
{
"epoch": 0.32953864589574594,
"grad_norm": 3.711364269256592,
"learning_rate": 4.4966713738148075e-05,
"loss": 0.1425,
"step": 550
},
{
"epoch": 0.335530257639305,
"grad_norm": 3.011685609817505,
"learning_rate": 4.486584627799072e-05,
"loss": 0.1375,
"step": 560
},
{
"epoch": 0.341521869382864,
"grad_norm": 3.6026058197021484,
"learning_rate": 4.476497881783337e-05,
"loss": 0.1368,
"step": 570
},
{
"epoch": 0.347513481126423,
"grad_norm": 3.2295010089874268,
"learning_rate": 4.4664111357676015e-05,
"loss": 0.1294,
"step": 580
},
{
"epoch": 0.35350509286998205,
"grad_norm": 4.814386367797852,
"learning_rate": 4.4563243897518664e-05,
"loss": 0.1609,
"step": 590
},
{
"epoch": 0.35949670461354105,
"grad_norm": 2.2720932960510254,
"learning_rate": 4.4462376437361306e-05,
"loss": 0.1215,
"step": 600
},
{
"epoch": 0.36548831635710005,
"grad_norm": 2.83774995803833,
"learning_rate": 4.436150897720396e-05,
"loss": 0.1352,
"step": 610
},
{
"epoch": 0.37147992810065905,
"grad_norm": 2.27290678024292,
"learning_rate": 4.4260641517046604e-05,
"loss": 0.1314,
"step": 620
},
{
"epoch": 0.3774715398442181,
"grad_norm": 4.785513401031494,
"learning_rate": 4.4159774056889246e-05,
"loss": 0.145,
"step": 630
},
{
"epoch": 0.3834631515877771,
"grad_norm": 3.888967514038086,
"learning_rate": 4.4058906596731895e-05,
"loss": 0.1473,
"step": 640
},
{
"epoch": 0.3894547633313361,
"grad_norm": 3.7604222297668457,
"learning_rate": 4.3958039136574544e-05,
"loss": 0.133,
"step": 650
},
{
"epoch": 0.39544637507489516,
"grad_norm": 2.7649688720703125,
"learning_rate": 4.385717167641719e-05,
"loss": 0.1401,
"step": 660
},
{
"epoch": 0.40143798681845416,
"grad_norm": 2.3196585178375244,
"learning_rate": 4.3756304216259835e-05,
"loss": 0.1399,
"step": 670
},
{
"epoch": 0.40742959856201316,
"grad_norm": 2.82987904548645,
"learning_rate": 4.3655436756102484e-05,
"loss": 0.1439,
"step": 680
},
{
"epoch": 0.4134212103055722,
"grad_norm": 2.5953242778778076,
"learning_rate": 4.355456929594513e-05,
"loss": 0.1324,
"step": 690
},
{
"epoch": 0.4194128220491312,
"grad_norm": 2.913365125656128,
"learning_rate": 4.3453701835787775e-05,
"loss": 0.1546,
"step": 700
},
{
"epoch": 0.4254044337926902,
"grad_norm": 2.669905424118042,
"learning_rate": 4.3352834375630424e-05,
"loss": 0.1304,
"step": 710
},
{
"epoch": 0.4313960455362493,
"grad_norm": 3.4256887435913086,
"learning_rate": 4.3251966915473066e-05,
"loss": 0.1349,
"step": 720
},
{
"epoch": 0.4373876572798083,
"grad_norm": 2.3781442642211914,
"learning_rate": 4.315109945531572e-05,
"loss": 0.1315,
"step": 730
},
{
"epoch": 0.4433792690233673,
"grad_norm": 2.7064337730407715,
"learning_rate": 4.3050231995158364e-05,
"loss": 0.1586,
"step": 740
},
{
"epoch": 0.44937088076692633,
"grad_norm": 2.1065878868103027,
"learning_rate": 4.294936453500101e-05,
"loss": 0.1331,
"step": 750
},
{
"epoch": 0.45536249251048533,
"grad_norm": 2.7355570793151855,
"learning_rate": 4.2848497074843655e-05,
"loss": 0.1188,
"step": 760
},
{
"epoch": 0.46135410425404433,
"grad_norm": 2.5642316341400146,
"learning_rate": 4.2747629614686304e-05,
"loss": 0.1161,
"step": 770
},
{
"epoch": 0.46734571599760333,
"grad_norm": 4.905550003051758,
"learning_rate": 4.264676215452895e-05,
"loss": 0.1226,
"step": 780
},
{
"epoch": 0.4733373277411624,
"grad_norm": 2.9115347862243652,
"learning_rate": 4.2545894694371595e-05,
"loss": 0.134,
"step": 790
},
{
"epoch": 0.4793289394847214,
"grad_norm": 2.8118338584899902,
"learning_rate": 4.2445027234214244e-05,
"loss": 0.1397,
"step": 800
},
{
"epoch": 0.4853205512282804,
"grad_norm": 3.0868048667907715,
"learning_rate": 4.234415977405689e-05,
"loss": 0.1349,
"step": 810
},
{
"epoch": 0.49131216297183944,
"grad_norm": 2.3112714290618896,
"learning_rate": 4.224329231389954e-05,
"loss": 0.131,
"step": 820
},
{
"epoch": 0.49730377471539844,
"grad_norm": 3.563694477081299,
"learning_rate": 4.2142424853742184e-05,
"loss": 0.1372,
"step": 830
},
{
"epoch": 0.5032953864589574,
"grad_norm": 3.5067434310913086,
"learning_rate": 4.2041557393584826e-05,
"loss": 0.1379,
"step": 840
},
{
"epoch": 0.5092869982025164,
"grad_norm": 3.358790397644043,
"learning_rate": 4.194068993342748e-05,
"loss": 0.1322,
"step": 850
},
{
"epoch": 0.5152786099460755,
"grad_norm": 2.632565498352051,
"learning_rate": 4.1839822473270124e-05,
"loss": 0.1542,
"step": 860
},
{
"epoch": 0.5212702216896345,
"grad_norm": 3.4644100666046143,
"learning_rate": 4.173895501311277e-05,
"loss": 0.1394,
"step": 870
},
{
"epoch": 0.5272618334331935,
"grad_norm": 1.9788542985916138,
"learning_rate": 4.1638087552955415e-05,
"loss": 0.1466,
"step": 880
},
{
"epoch": 0.5332534451767525,
"grad_norm": 3.182896852493286,
"learning_rate": 4.1537220092798064e-05,
"loss": 0.1325,
"step": 890
},
{
"epoch": 0.5392450569203115,
"grad_norm": 2.9272377490997314,
"learning_rate": 4.143635263264071e-05,
"loss": 0.1547,
"step": 900
},
{
"epoch": 0.5452366686638705,
"grad_norm": 3.800172805786133,
"learning_rate": 4.1335485172483355e-05,
"loss": 0.1459,
"step": 910
},
{
"epoch": 0.5512282804074295,
"grad_norm": 3.8579702377319336,
"learning_rate": 4.1234617712326004e-05,
"loss": 0.1448,
"step": 920
},
{
"epoch": 0.5572198921509887,
"grad_norm": 3.6494719982147217,
"learning_rate": 4.113375025216865e-05,
"loss": 0.13,
"step": 930
},
{
"epoch": 0.5632115038945477,
"grad_norm": 3.322023391723633,
"learning_rate": 4.10328827920113e-05,
"loss": 0.1366,
"step": 940
},
{
"epoch": 0.5692031156381067,
"grad_norm": 3.6455154418945312,
"learning_rate": 4.0932015331853944e-05,
"loss": 0.1409,
"step": 950
},
{
"epoch": 0.5751947273816657,
"grad_norm": 2.7181620597839355,
"learning_rate": 4.083114787169659e-05,
"loss": 0.1229,
"step": 960
},
{
"epoch": 0.5811863391252247,
"grad_norm": 2.1503639221191406,
"learning_rate": 4.073028041153924e-05,
"loss": 0.1163,
"step": 970
},
{
"epoch": 0.5871779508687837,
"grad_norm": 4.750929832458496,
"learning_rate": 4.0629412951381884e-05,
"loss": 0.1422,
"step": 980
},
{
"epoch": 0.5931695626123428,
"grad_norm": 2.9658565521240234,
"learning_rate": 4.052854549122453e-05,
"loss": 0.1121,
"step": 990
},
{
"epoch": 0.5991611743559018,
"grad_norm": 3.2859907150268555,
"learning_rate": 4.042767803106718e-05,
"loss": 0.1594,
"step": 1000
},
{
"epoch": 0.5991611743559018,
"eval_loss": 0.696466326713562,
"eval_runtime": 745.6448,
"eval_samples_per_second": 1.902,
"eval_steps_per_second": 0.119,
"eval_wer": 0.5007994974587402,
"step": 1000
},
{
"epoch": 0.6051527860994608,
"grad_norm": 2.541327476501465,
"learning_rate": 4.032681057090983e-05,
"loss": 0.1066,
"step": 1010
},
{
"epoch": 0.6111443978430198,
"grad_norm": 2.80812406539917,
"learning_rate": 4.022594311075247e-05,
"loss": 0.1276,
"step": 1020
},
{
"epoch": 0.6171360095865788,
"grad_norm": 3.0700008869171143,
"learning_rate": 4.0125075650595115e-05,
"loss": 0.1332,
"step": 1030
},
{
"epoch": 0.6231276213301378,
"grad_norm": 2.8067898750305176,
"learning_rate": 4.0024208190437764e-05,
"loss": 0.1253,
"step": 1040
},
{
"epoch": 0.6291192330736968,
"grad_norm": 2.7025182247161865,
"learning_rate": 3.992334073028041e-05,
"loss": 0.1325,
"step": 1050
},
{
"epoch": 0.6351108448172559,
"grad_norm": 3.257716178894043,
"learning_rate": 3.982247327012306e-05,
"loss": 0.1327,
"step": 1060
},
{
"epoch": 0.6411024565608149,
"grad_norm": 3.26241135597229,
"learning_rate": 3.9721605809965704e-05,
"loss": 0.1629,
"step": 1070
},
{
"epoch": 0.6470940683043739,
"grad_norm": 2.933842420578003,
"learning_rate": 3.962073834980835e-05,
"loss": 0.1399,
"step": 1080
},
{
"epoch": 0.6530856800479329,
"grad_norm": 4.348404407501221,
"learning_rate": 3.9519870889651e-05,
"loss": 0.1429,
"step": 1090
},
{
"epoch": 0.6590772917914919,
"grad_norm": 2.5708649158477783,
"learning_rate": 3.9419003429493644e-05,
"loss": 0.1335,
"step": 1100
},
{
"epoch": 0.6650689035350509,
"grad_norm": 3.140418291091919,
"learning_rate": 3.931813596933629e-05,
"loss": 0.1475,
"step": 1110
},
{
"epoch": 0.67106051527861,
"grad_norm": 3.433528423309326,
"learning_rate": 3.921726850917894e-05,
"loss": 0.1442,
"step": 1120
},
{
"epoch": 0.677052127022169,
"grad_norm": 3.7102956771850586,
"learning_rate": 3.911640104902159e-05,
"loss": 0.1507,
"step": 1130
},
{
"epoch": 0.683043738765728,
"grad_norm": 2.1036972999572754,
"learning_rate": 3.901553358886423e-05,
"loss": 0.1341,
"step": 1140
},
{
"epoch": 0.689035350509287,
"grad_norm": 2.8285186290740967,
"learning_rate": 3.891466612870688e-05,
"loss": 0.1357,
"step": 1150
},
{
"epoch": 0.695026962252846,
"grad_norm": 3.331232786178589,
"learning_rate": 3.881379866854953e-05,
"loss": 0.1437,
"step": 1160
},
{
"epoch": 0.701018573996405,
"grad_norm": 2.9205992221832275,
"learning_rate": 3.871293120839217e-05,
"loss": 0.1365,
"step": 1170
},
{
"epoch": 0.7070101857399641,
"grad_norm": 3.321892261505127,
"learning_rate": 3.861206374823482e-05,
"loss": 0.1468,
"step": 1180
},
{
"epoch": 0.7130017974835231,
"grad_norm": 2.895930528640747,
"learning_rate": 3.8511196288077464e-05,
"loss": 0.1272,
"step": 1190
},
{
"epoch": 0.7189934092270821,
"grad_norm": 5.071239471435547,
"learning_rate": 3.8410328827920113e-05,
"loss": 0.1467,
"step": 1200
},
{
"epoch": 0.7249850209706411,
"grad_norm": 5.675464153289795,
"learning_rate": 3.830946136776276e-05,
"loss": 0.1455,
"step": 1210
},
{
"epoch": 0.7309766327142001,
"grad_norm": 3.4301488399505615,
"learning_rate": 3.8208593907605405e-05,
"loss": 0.1495,
"step": 1220
},
{
"epoch": 0.7369682444577591,
"grad_norm": 4.892243385314941,
"learning_rate": 3.8107726447448053e-05,
"loss": 0.1581,
"step": 1230
},
{
"epoch": 0.7429598562013181,
"grad_norm": 2.309187173843384,
"learning_rate": 3.80068589872907e-05,
"loss": 0.1498,
"step": 1240
},
{
"epoch": 0.7489514679448772,
"grad_norm": 4.048982620239258,
"learning_rate": 3.790599152713335e-05,
"loss": 0.1504,
"step": 1250
},
{
"epoch": 0.7549430796884362,
"grad_norm": 3.6077523231506348,
"learning_rate": 3.7805124066975993e-05,
"loss": 0.1632,
"step": 1260
},
{
"epoch": 0.7609346914319952,
"grad_norm": 1.95468270778656,
"learning_rate": 3.770425660681864e-05,
"loss": 0.1308,
"step": 1270
},
{
"epoch": 0.7669263031755542,
"grad_norm": 3.681065082550049,
"learning_rate": 3.760338914666129e-05,
"loss": 0.1273,
"step": 1280
},
{
"epoch": 0.7729179149191132,
"grad_norm": 2.304133653640747,
"learning_rate": 3.7502521686503934e-05,
"loss": 0.1383,
"step": 1290
},
{
"epoch": 0.7789095266626722,
"grad_norm": 3.7350196838378906,
"learning_rate": 3.740165422634658e-05,
"loss": 0.122,
"step": 1300
},
{
"epoch": 0.7849011384062313,
"grad_norm": 2.206613540649414,
"learning_rate": 3.7300786766189225e-05,
"loss": 0.1353,
"step": 1310
},
{
"epoch": 0.7908927501497903,
"grad_norm": 3.012110710144043,
"learning_rate": 3.719991930603188e-05,
"loss": 0.139,
"step": 1320
},
{
"epoch": 0.7968843618933493,
"grad_norm": 3.693063497543335,
"learning_rate": 3.709905184587452e-05,
"loss": 0.1544,
"step": 1330
},
{
"epoch": 0.8028759736369083,
"grad_norm": 2.1389901638031006,
"learning_rate": 3.699818438571717e-05,
"loss": 0.1386,
"step": 1340
},
{
"epoch": 0.8088675853804673,
"grad_norm": 2.6895837783813477,
"learning_rate": 3.6897316925559814e-05,
"loss": 0.1315,
"step": 1350
},
{
"epoch": 0.8148591971240263,
"grad_norm": 2.8541972637176514,
"learning_rate": 3.679644946540246e-05,
"loss": 0.1326,
"step": 1360
},
{
"epoch": 0.8208508088675854,
"grad_norm": 3.095381259918213,
"learning_rate": 3.669558200524511e-05,
"loss": 0.1374,
"step": 1370
},
{
"epoch": 0.8268424206111444,
"grad_norm": 1.8898283243179321,
"learning_rate": 3.6594714545087754e-05,
"loss": 0.1336,
"step": 1380
},
{
"epoch": 0.8328340323547034,
"grad_norm": 3.8323473930358887,
"learning_rate": 3.64938470849304e-05,
"loss": 0.1335,
"step": 1390
},
{
"epoch": 0.8388256440982624,
"grad_norm": 2.4315149784088135,
"learning_rate": 3.639297962477305e-05,
"loss": 0.145,
"step": 1400
},
{
"epoch": 0.8448172558418214,
"grad_norm": 1.6960047483444214,
"learning_rate": 3.62921121646157e-05,
"loss": 0.1326,
"step": 1410
},
{
"epoch": 0.8508088675853804,
"grad_norm": 2.898869752883911,
"learning_rate": 3.619124470445834e-05,
"loss": 0.1263,
"step": 1420
},
{
"epoch": 0.8568004793289394,
"grad_norm": 2.389853000640869,
"learning_rate": 3.6090377244300985e-05,
"loss": 0.1337,
"step": 1430
},
{
"epoch": 0.8627920910724985,
"grad_norm": 2.521010398864746,
"learning_rate": 3.598950978414364e-05,
"loss": 0.1218,
"step": 1440
},
{
"epoch": 0.8687837028160575,
"grad_norm": 3.900918960571289,
"learning_rate": 3.588864232398628e-05,
"loss": 0.1161,
"step": 1450
},
{
"epoch": 0.8747753145596165,
"grad_norm": 3.4358227252960205,
"learning_rate": 3.578777486382893e-05,
"loss": 0.1311,
"step": 1460
},
{
"epoch": 0.8807669263031755,
"grad_norm": 2.923558473587036,
"learning_rate": 3.5686907403671574e-05,
"loss": 0.1359,
"step": 1470
},
{
"epoch": 0.8867585380467345,
"grad_norm": 2.5613369941711426,
"learning_rate": 3.558603994351423e-05,
"loss": 0.1126,
"step": 1480
},
{
"epoch": 0.8927501497902935,
"grad_norm": 2.89349627494812,
"learning_rate": 3.548517248335687e-05,
"loss": 0.1473,
"step": 1490
},
{
"epoch": 0.8987417615338527,
"grad_norm": 2.973292112350464,
"learning_rate": 3.5384305023199514e-05,
"loss": 0.1485,
"step": 1500
},
{
"epoch": 0.9047333732774117,
"grad_norm": 2.166511058807373,
"learning_rate": 3.528343756304216e-05,
"loss": 0.1242,
"step": 1510
},
{
"epoch": 0.9107249850209707,
"grad_norm": 2.6223630905151367,
"learning_rate": 3.518257010288481e-05,
"loss": 0.1255,
"step": 1520
},
{
"epoch": 0.9167165967645297,
"grad_norm": 2.9496703147888184,
"learning_rate": 3.508170264272746e-05,
"loss": 0.1303,
"step": 1530
},
{
"epoch": 0.9227082085080887,
"grad_norm": 3.1444201469421387,
"learning_rate": 3.49808351825701e-05,
"loss": 0.1549,
"step": 1540
},
{
"epoch": 0.9286998202516477,
"grad_norm": 4.5898590087890625,
"learning_rate": 3.487996772241275e-05,
"loss": 0.1306,
"step": 1550
},
{
"epoch": 0.9346914319952067,
"grad_norm": 2.1030378341674805,
"learning_rate": 3.47791002622554e-05,
"loss": 0.1185,
"step": 1560
},
{
"epoch": 0.9406830437387658,
"grad_norm": 2.828355550765991,
"learning_rate": 3.467823280209804e-05,
"loss": 0.135,
"step": 1570
},
{
"epoch": 0.9466746554823248,
"grad_norm": 1.8146847486495972,
"learning_rate": 3.457736534194069e-05,
"loss": 0.1157,
"step": 1580
},
{
"epoch": 0.9526662672258838,
"grad_norm": 5.510678768157959,
"learning_rate": 3.4476497881783334e-05,
"loss": 0.1204,
"step": 1590
},
{
"epoch": 0.9586578789694428,
"grad_norm": 3.572500467300415,
"learning_rate": 3.437563042162599e-05,
"loss": 0.1465,
"step": 1600
},
{
"epoch": 0.9646494907130018,
"grad_norm": 2.8663079738616943,
"learning_rate": 3.427476296146863e-05,
"loss": 0.1101,
"step": 1610
},
{
"epoch": 0.9706411024565608,
"grad_norm": 2.5090837478637695,
"learning_rate": 3.4173895501311274e-05,
"loss": 0.1127,
"step": 1620
},
{
"epoch": 0.9766327142001199,
"grad_norm": 2.927410840988159,
"learning_rate": 3.407302804115392e-05,
"loss": 0.1496,
"step": 1630
},
{
"epoch": 0.9826243259436789,
"grad_norm": 2.375161647796631,
"learning_rate": 3.397216058099657e-05,
"loss": 0.1243,
"step": 1640
},
{
"epoch": 0.9886159376872379,
"grad_norm": 3.1424996852874756,
"learning_rate": 3.387129312083922e-05,
"loss": 0.1285,
"step": 1650
},
{
"epoch": 0.9946075494307969,
"grad_norm": 2.785811185836792,
"learning_rate": 3.377042566068186e-05,
"loss": 0.1286,
"step": 1660
},
{
"epoch": 1.0005991611743559,
"grad_norm": 2.26493763923645,
"learning_rate": 3.366955820052451e-05,
"loss": 0.1366,
"step": 1670
},
{
"epoch": 1.0065907729179149,
"grad_norm": 1.9755520820617676,
"learning_rate": 3.356869074036716e-05,
"loss": 0.0766,
"step": 1680
},
{
"epoch": 1.0125823846614739,
"grad_norm": 2.0320472717285156,
"learning_rate": 3.34678232802098e-05,
"loss": 0.0777,
"step": 1690
},
{
"epoch": 1.0185739964050329,
"grad_norm": 2.279986619949341,
"learning_rate": 3.336695582005245e-05,
"loss": 0.0767,
"step": 1700
},
{
"epoch": 1.0245656081485919,
"grad_norm": 1.7731980085372925,
"learning_rate": 3.32660883598951e-05,
"loss": 0.0922,
"step": 1710
},
{
"epoch": 1.030557219892151,
"grad_norm": 2.6267566680908203,
"learning_rate": 3.316522089973775e-05,
"loss": 0.0822,
"step": 1720
},
{
"epoch": 1.03654883163571,
"grad_norm": 2.804668426513672,
"learning_rate": 3.306435343958039e-05,
"loss": 0.0855,
"step": 1730
},
{
"epoch": 1.042540443379269,
"grad_norm": 1.526654601097107,
"learning_rate": 3.296348597942304e-05,
"loss": 0.066,
"step": 1740
},
{
"epoch": 1.048532055122828,
"grad_norm": 2.8683853149414062,
"learning_rate": 3.286261851926569e-05,
"loss": 0.0811,
"step": 1750
},
{
"epoch": 1.054523666866387,
"grad_norm": 1.9529556035995483,
"learning_rate": 3.276175105910833e-05,
"loss": 0.0823,
"step": 1760
},
{
"epoch": 1.060515278609946,
"grad_norm": 2.4281697273254395,
"learning_rate": 3.266088359895098e-05,
"loss": 0.0948,
"step": 1770
},
{
"epoch": 1.066506890353505,
"grad_norm": 2.2370128631591797,
"learning_rate": 3.256001613879362e-05,
"loss": 0.0793,
"step": 1780
},
{
"epoch": 1.072498502097064,
"grad_norm": 3.237933397293091,
"learning_rate": 3.245914867863627e-05,
"loss": 0.0889,
"step": 1790
},
{
"epoch": 1.078490113840623,
"grad_norm": 2.237818479537964,
"learning_rate": 3.235828121847892e-05,
"loss": 0.0739,
"step": 1800
},
{
"epoch": 1.084481725584182,
"grad_norm": 2.257967710494995,
"learning_rate": 3.225741375832157e-05,
"loss": 0.0847,
"step": 1810
},
{
"epoch": 1.090473337327741,
"grad_norm": 2.68813419342041,
"learning_rate": 3.215654629816421e-05,
"loss": 0.0924,
"step": 1820
},
{
"epoch": 1.0964649490713,
"grad_norm": 3.0873122215270996,
"learning_rate": 3.205567883800686e-05,
"loss": 0.0766,
"step": 1830
},
{
"epoch": 1.102456560814859,
"grad_norm": 2.305025339126587,
"learning_rate": 3.195481137784951e-05,
"loss": 0.0799,
"step": 1840
},
{
"epoch": 1.1084481725584183,
"grad_norm": 2.242445707321167,
"learning_rate": 3.185394391769215e-05,
"loss": 0.0928,
"step": 1850
},
{
"epoch": 1.1144397843019773,
"grad_norm": 3.2711095809936523,
"learning_rate": 3.17530764575348e-05,
"loss": 0.0863,
"step": 1860
},
{
"epoch": 1.1204313960455363,
"grad_norm": 2.1616060733795166,
"learning_rate": 3.165220899737745e-05,
"loss": 0.0939,
"step": 1870
},
{
"epoch": 1.1264230077890953,
"grad_norm": 1.922755479812622,
"learning_rate": 3.155134153722009e-05,
"loss": 0.0864,
"step": 1880
},
{
"epoch": 1.1324146195326543,
"grad_norm": 2.7162983417510986,
"learning_rate": 3.145047407706274e-05,
"loss": 0.0804,
"step": 1890
},
{
"epoch": 1.1384062312762133,
"grad_norm": 1.7950539588928223,
"learning_rate": 3.134960661690538e-05,
"loss": 0.076,
"step": 1900
},
{
"epoch": 1.1443978430197723,
"grad_norm": 1.889122724533081,
"learning_rate": 3.124873915674804e-05,
"loss": 0.0725,
"step": 1910
},
{
"epoch": 1.1503894547633313,
"grad_norm": 1.9550236463546753,
"learning_rate": 3.114787169659068e-05,
"loss": 0.0781,
"step": 1920
},
{
"epoch": 1.1563810665068903,
"grad_norm": 2.5608322620391846,
"learning_rate": 3.104700423643333e-05,
"loss": 0.0786,
"step": 1930
},
{
"epoch": 1.1623726782504493,
"grad_norm": 2.287541389465332,
"learning_rate": 3.094613677627597e-05,
"loss": 0.0773,
"step": 1940
},
{
"epoch": 1.1683642899940083,
"grad_norm": 2.689326286315918,
"learning_rate": 3.084526931611862e-05,
"loss": 0.0778,
"step": 1950
},
{
"epoch": 1.1743559017375673,
"grad_norm": 2.6887409687042236,
"learning_rate": 3.074440185596127e-05,
"loss": 0.0734,
"step": 1960
},
{
"epoch": 1.1803475134811263,
"grad_norm": 2.236410140991211,
"learning_rate": 3.064353439580391e-05,
"loss": 0.0638,
"step": 1970
},
{
"epoch": 1.1863391252246855,
"grad_norm": 1.5748876333236694,
"learning_rate": 3.054266693564656e-05,
"loss": 0.0755,
"step": 1980
},
{
"epoch": 1.1923307369682445,
"grad_norm": 2.5412073135375977,
"learning_rate": 3.0441799475489207e-05,
"loss": 0.0843,
"step": 1990
},
{
"epoch": 1.1983223487118035,
"grad_norm": 2.379398822784424,
"learning_rate": 3.034093201533186e-05,
"loss": 0.0665,
"step": 2000
},
{
"epoch": 1.1983223487118035,
"eval_loss": 0.7183927297592163,
"eval_runtime": 684.2755,
"eval_samples_per_second": 2.072,
"eval_steps_per_second": 0.13,
"eval_wer": 0.49623094055165323,
"step": 2000
},
{
"epoch": 1.2043139604553625,
"grad_norm": 2.496299982070923,
"learning_rate": 3.02400645551745e-05,
"loss": 0.0914,
"step": 2010
},
{
"epoch": 1.2103055721989215,
"grad_norm": 3.0780892372131348,
"learning_rate": 3.0139197095017147e-05,
"loss": 0.0851,
"step": 2020
},
{
"epoch": 1.2162971839424805,
"grad_norm": 2.4244396686553955,
"learning_rate": 3.0038329634859796e-05,
"loss": 0.0668,
"step": 2030
},
{
"epoch": 1.2222887956860395,
"grad_norm": 3.1019625663757324,
"learning_rate": 2.993746217470244e-05,
"loss": 0.0653,
"step": 2040
},
{
"epoch": 1.2282804074295985,
"grad_norm": 2.498575448989868,
"learning_rate": 2.983659471454509e-05,
"loss": 0.0732,
"step": 2050
},
{
"epoch": 1.2342720191731575,
"grad_norm": 2.5372304916381836,
"learning_rate": 2.9735727254387736e-05,
"loss": 0.0866,
"step": 2060
},
{
"epoch": 1.2402636309167165,
"grad_norm": 1.5571708679199219,
"learning_rate": 2.9634859794230385e-05,
"loss": 0.0845,
"step": 2070
},
{
"epoch": 1.2462552426602755,
"grad_norm": 2.367034912109375,
"learning_rate": 2.953399233407303e-05,
"loss": 0.0787,
"step": 2080
},
{
"epoch": 1.2522468544038348,
"grad_norm": 3.7114803791046143,
"learning_rate": 2.9433124873915672e-05,
"loss": 0.0858,
"step": 2090
},
{
"epoch": 1.2582384661473935,
"grad_norm": 2.6696841716766357,
"learning_rate": 2.9332257413758325e-05,
"loss": 0.0976,
"step": 2100
},
{
"epoch": 1.2642300778909528,
"grad_norm": 3.041776418685913,
"learning_rate": 2.9231389953600967e-05,
"loss": 0.0658,
"step": 2110
},
{
"epoch": 1.2702216896345118,
"grad_norm": 3.1914546489715576,
"learning_rate": 2.913052249344362e-05,
"loss": 0.0753,
"step": 2120
},
{
"epoch": 1.2762133013780708,
"grad_norm": 1.524961233139038,
"learning_rate": 2.902965503328626e-05,
"loss": 0.0791,
"step": 2130
},
{
"epoch": 1.2822049131216298,
"grad_norm": 2.4494245052337646,
"learning_rate": 2.8928787573128914e-05,
"loss": 0.0707,
"step": 2140
},
{
"epoch": 1.2881965248651888,
"grad_norm": 3.0455808639526367,
"learning_rate": 2.8827920112971556e-05,
"loss": 0.0853,
"step": 2150
},
{
"epoch": 1.2941881366087478,
"grad_norm": 2.0262930393218994,
"learning_rate": 2.87270526528142e-05,
"loss": 0.0704,
"step": 2160
},
{
"epoch": 1.3001797483523068,
"grad_norm": 1.6224325895309448,
"learning_rate": 2.862618519265685e-05,
"loss": 0.077,
"step": 2170
},
{
"epoch": 1.3061713600958658,
"grad_norm": 2.560068130493164,
"learning_rate": 2.8525317732499496e-05,
"loss": 0.0845,
"step": 2180
},
{
"epoch": 1.3121629718394248,
"grad_norm": 2.7368597984313965,
"learning_rate": 2.8424450272342145e-05,
"loss": 0.0777,
"step": 2190
},
{
"epoch": 1.3181545835829838,
"grad_norm": 2.2715647220611572,
"learning_rate": 2.832358281218479e-05,
"loss": 0.0748,
"step": 2200
},
{
"epoch": 1.3241461953265428,
"grad_norm": 2.4632680416107178,
"learning_rate": 2.8222715352027436e-05,
"loss": 0.0874,
"step": 2210
},
{
"epoch": 1.330137807070102,
"grad_norm": 2.501645088195801,
"learning_rate": 2.8121847891870085e-05,
"loss": 0.0774,
"step": 2220
},
{
"epoch": 1.3361294188136608,
"grad_norm": 1.52916419506073,
"learning_rate": 2.802098043171273e-05,
"loss": 0.0685,
"step": 2230
},
{
"epoch": 1.34212103055722,
"grad_norm": 1.6257095336914062,
"learning_rate": 2.792011297155538e-05,
"loss": 0.0667,
"step": 2240
},
{
"epoch": 1.348112642300779,
"grad_norm": 2.855592966079712,
"learning_rate": 2.781924551139802e-05,
"loss": 0.0832,
"step": 2250
},
{
"epoch": 1.354104254044338,
"grad_norm": 2.8703014850616455,
"learning_rate": 2.7718378051240674e-05,
"loss": 0.068,
"step": 2260
},
{
"epoch": 1.360095865787897,
"grad_norm": 2.3611278533935547,
"learning_rate": 2.7617510591083316e-05,
"loss": 0.0751,
"step": 2270
},
{
"epoch": 1.366087477531456,
"grad_norm": 1.7880631685256958,
"learning_rate": 2.751664313092596e-05,
"loss": 0.0724,
"step": 2280
},
{
"epoch": 1.372079089275015,
"grad_norm": 1.9004284143447876,
"learning_rate": 2.741577567076861e-05,
"loss": 0.0909,
"step": 2290
},
{
"epoch": 1.378070701018574,
"grad_norm": 2.208883047103882,
"learning_rate": 2.7314908210611256e-05,
"loss": 0.0671,
"step": 2300
},
{
"epoch": 1.384062312762133,
"grad_norm": 1.8540887832641602,
"learning_rate": 2.7214040750453905e-05,
"loss": 0.0861,
"step": 2310
},
{
"epoch": 1.390053924505692,
"grad_norm": 2.8379740715026855,
"learning_rate": 2.711317329029655e-05,
"loss": 0.0714,
"step": 2320
},
{
"epoch": 1.396045536249251,
"grad_norm": 2.2230288982391357,
"learning_rate": 2.70123058301392e-05,
"loss": 0.077,
"step": 2330
},
{
"epoch": 1.40203714799281,
"grad_norm": 3.1027259826660156,
"learning_rate": 2.6911438369981845e-05,
"loss": 0.0796,
"step": 2340
},
{
"epoch": 1.4080287597363692,
"grad_norm": 2.489982843399048,
"learning_rate": 2.681057090982449e-05,
"loss": 0.0727,
"step": 2350
},
{
"epoch": 1.414020371479928,
"grad_norm": 2.224640130996704,
"learning_rate": 2.670970344966714e-05,
"loss": 0.0839,
"step": 2360
},
{
"epoch": 1.4200119832234872,
"grad_norm": 2.718700647354126,
"learning_rate": 2.6608835989509785e-05,
"loss": 0.075,
"step": 2370
},
{
"epoch": 1.4260035949670462,
"grad_norm": 2.6818034648895264,
"learning_rate": 2.6507968529352434e-05,
"loss": 0.0734,
"step": 2380
},
{
"epoch": 1.4319952067106052,
"grad_norm": 1.8535293340682983,
"learning_rate": 2.640710106919508e-05,
"loss": 0.076,
"step": 2390
},
{
"epoch": 1.4379868184541642,
"grad_norm": 2.8081820011138916,
"learning_rate": 2.630623360903773e-05,
"loss": 0.0707,
"step": 2400
},
{
"epoch": 1.4439784301977232,
"grad_norm": 2.0238046646118164,
"learning_rate": 2.6205366148880374e-05,
"loss": 0.0765,
"step": 2410
},
{
"epoch": 1.4499700419412822,
"grad_norm": 2.9945600032806396,
"learning_rate": 2.6104498688723016e-05,
"loss": 0.0907,
"step": 2420
},
{
"epoch": 1.4559616536848412,
"grad_norm": 3.8100576400756836,
"learning_rate": 2.6003631228565665e-05,
"loss": 0.0873,
"step": 2430
},
{
"epoch": 1.4619532654284002,
"grad_norm": 1.8465749025344849,
"learning_rate": 2.590276376840831e-05,
"loss": 0.0646,
"step": 2440
},
{
"epoch": 1.4679448771719592,
"grad_norm": 2.084381103515625,
"learning_rate": 2.580189630825096e-05,
"loss": 0.0697,
"step": 2450
},
{
"epoch": 1.4739364889155182,
"grad_norm": 2.089414358139038,
"learning_rate": 2.5701028848093605e-05,
"loss": 0.0697,
"step": 2460
},
{
"epoch": 1.4799281006590772,
"grad_norm": 2.39705491065979,
"learning_rate": 2.5600161387936254e-05,
"loss": 0.0822,
"step": 2470
},
{
"epoch": 1.4859197124026364,
"grad_norm": 2.2949612140655518,
"learning_rate": 2.54992939277789e-05,
"loss": 0.0883,
"step": 2480
},
{
"epoch": 1.4919113241461952,
"grad_norm": 1.9815038442611694,
"learning_rate": 2.5398426467621545e-05,
"loss": 0.0712,
"step": 2490
},
{
"epoch": 1.4979029358897544,
"grad_norm": 2.18231201171875,
"learning_rate": 2.5297559007464194e-05,
"loss": 0.09,
"step": 2500
},
{
"epoch": 1.5038945476333132,
"grad_norm": 2.602478504180908,
"learning_rate": 2.519669154730684e-05,
"loss": 0.0837,
"step": 2510
},
{
"epoch": 1.5098861593768724,
"grad_norm": 1.8262310028076172,
"learning_rate": 2.509582408714949e-05,
"loss": 0.0812,
"step": 2520
},
{
"epoch": 1.5158777711204314,
"grad_norm": 2.6024467945098877,
"learning_rate": 2.4994956626992134e-05,
"loss": 0.0747,
"step": 2530
},
{
"epoch": 1.5218693828639904,
"grad_norm": 2.4434356689453125,
"learning_rate": 2.489408916683478e-05,
"loss": 0.0883,
"step": 2540
},
{
"epoch": 1.5278609946075494,
"grad_norm": 2.055715322494507,
"learning_rate": 2.479322170667743e-05,
"loss": 0.0709,
"step": 2550
},
{
"epoch": 1.5338526063511084,
"grad_norm": 2.179154634475708,
"learning_rate": 2.4692354246520074e-05,
"loss": 0.084,
"step": 2560
},
{
"epoch": 1.5398442180946674,
"grad_norm": 3.096709728240967,
"learning_rate": 2.4591486786362723e-05,
"loss": 0.0851,
"step": 2570
},
{
"epoch": 1.5458358298382264,
"grad_norm": 2.3316915035247803,
"learning_rate": 2.4490619326205365e-05,
"loss": 0.0834,
"step": 2580
},
{
"epoch": 1.5518274415817856,
"grad_norm": 1.9087728261947632,
"learning_rate": 2.4389751866048014e-05,
"loss": 0.0761,
"step": 2590
},
{
"epoch": 1.5578190533253444,
"grad_norm": 2.3828837871551514,
"learning_rate": 2.428888440589066e-05,
"loss": 0.0888,
"step": 2600
},
{
"epoch": 1.5638106650689036,
"grad_norm": 2.2623255252838135,
"learning_rate": 2.418801694573331e-05,
"loss": 0.0905,
"step": 2610
},
{
"epoch": 1.5698022768124624,
"grad_norm": 2.371645212173462,
"learning_rate": 2.4087149485575954e-05,
"loss": 0.0674,
"step": 2620
},
{
"epoch": 1.5757938885560216,
"grad_norm": 3.3547451496124268,
"learning_rate": 2.3986282025418603e-05,
"loss": 0.0785,
"step": 2630
},
{
"epoch": 1.5817855002995804,
"grad_norm": 1.9994959831237793,
"learning_rate": 2.3885414565261246e-05,
"loss": 0.071,
"step": 2640
},
{
"epoch": 1.5877771120431396,
"grad_norm": 2.294867992401123,
"learning_rate": 2.3784547105103894e-05,
"loss": 0.0733,
"step": 2650
},
{
"epoch": 1.5937687237866986,
"grad_norm": 1.9338445663452148,
"learning_rate": 2.368367964494654e-05,
"loss": 0.0827,
"step": 2660
},
{
"epoch": 1.5997603355302576,
"grad_norm": 2.6823794841766357,
"learning_rate": 2.358281218478919e-05,
"loss": 0.0698,
"step": 2670
},
{
"epoch": 1.6057519472738166,
"grad_norm": 3.147641181945801,
"learning_rate": 2.3481944724631835e-05,
"loss": 0.0884,
"step": 2680
},
{
"epoch": 1.6117435590173756,
"grad_norm": 1.8961377143859863,
"learning_rate": 2.3381077264474483e-05,
"loss": 0.0711,
"step": 2690
},
{
"epoch": 1.6177351707609346,
"grad_norm": 3.8997535705566406,
"learning_rate": 2.328020980431713e-05,
"loss": 0.0672,
"step": 2700
},
{
"epoch": 1.6237267825044936,
"grad_norm": 2.460813283920288,
"learning_rate": 2.3179342344159775e-05,
"loss": 0.0849,
"step": 2710
},
{
"epoch": 1.6297183942480529,
"grad_norm": 2.1751198768615723,
"learning_rate": 2.307847488400242e-05,
"loss": 0.0921,
"step": 2720
},
{
"epoch": 1.6357100059916116,
"grad_norm": 2.415151596069336,
"learning_rate": 2.297760742384507e-05,
"loss": 0.0747,
"step": 2730
},
{
"epoch": 1.6417016177351709,
"grad_norm": 2.236166477203369,
"learning_rate": 2.2876739963687715e-05,
"loss": 0.0777,
"step": 2740
},
{
"epoch": 1.6476932294787296,
"grad_norm": 1.8858668804168701,
"learning_rate": 2.2775872503530364e-05,
"loss": 0.0641,
"step": 2750
},
{
"epoch": 1.6536848412222889,
"grad_norm": 3.2240684032440186,
"learning_rate": 2.267500504337301e-05,
"loss": 0.0656,
"step": 2760
},
{
"epoch": 1.6596764529658476,
"grad_norm": 2.5699257850646973,
"learning_rate": 2.2574137583215658e-05,
"loss": 0.0718,
"step": 2770
},
{
"epoch": 1.6656680647094069,
"grad_norm": 2.1655499935150146,
"learning_rate": 2.24732701230583e-05,
"loss": 0.0737,
"step": 2780
},
{
"epoch": 1.6716596764529659,
"grad_norm": 2.6293349266052246,
"learning_rate": 2.237240266290095e-05,
"loss": 0.0742,
"step": 2790
},
{
"epoch": 1.6776512881965249,
"grad_norm": 1.8740370273590088,
"learning_rate": 2.2271535202743595e-05,
"loss": 0.0577,
"step": 2800
},
{
"epoch": 1.6836428999400839,
"grad_norm": 2.1657564640045166,
"learning_rate": 2.2170667742586244e-05,
"loss": 0.0703,
"step": 2810
},
{
"epoch": 1.6896345116836429,
"grad_norm": 2.1452126502990723,
"learning_rate": 2.206980028242889e-05,
"loss": 0.0838,
"step": 2820
},
{
"epoch": 1.6956261234272019,
"grad_norm": 2.4039194583892822,
"learning_rate": 2.1968932822271538e-05,
"loss": 0.092,
"step": 2830
},
{
"epoch": 1.7016177351707609,
"grad_norm": 2.0987935066223145,
"learning_rate": 2.186806536211418e-05,
"loss": 0.0625,
"step": 2840
},
{
"epoch": 1.70760934691432,
"grad_norm": 1.980102777481079,
"learning_rate": 2.176719790195683e-05,
"loss": 0.0812,
"step": 2850
},
{
"epoch": 1.7136009586578789,
"grad_norm": 3.052550792694092,
"learning_rate": 2.1666330441799475e-05,
"loss": 0.069,
"step": 2860
},
{
"epoch": 1.719592570401438,
"grad_norm": 2.373227834701538,
"learning_rate": 2.1565462981642124e-05,
"loss": 0.072,
"step": 2870
},
{
"epoch": 1.7255841821449969,
"grad_norm": 3.045281410217285,
"learning_rate": 2.146459552148477e-05,
"loss": 0.0702,
"step": 2880
},
{
"epoch": 1.731575793888556,
"grad_norm": 2.5577785968780518,
"learning_rate": 2.1363728061327418e-05,
"loss": 0.0785,
"step": 2890
},
{
"epoch": 1.737567405632115,
"grad_norm": 2.3629047870635986,
"learning_rate": 2.1262860601170064e-05,
"loss": 0.0754,
"step": 2900
},
{
"epoch": 1.743559017375674,
"grad_norm": 2.157334089279175,
"learning_rate": 2.116199314101271e-05,
"loss": 0.0644,
"step": 2910
},
{
"epoch": 1.749550629119233,
"grad_norm": 2.86433482170105,
"learning_rate": 2.1061125680855355e-05,
"loss": 0.0834,
"step": 2920
},
{
"epoch": 1.755542240862792,
"grad_norm": 3.452363967895508,
"learning_rate": 2.0960258220698004e-05,
"loss": 0.0682,
"step": 2930
},
{
"epoch": 1.761533852606351,
"grad_norm": 1.9637304544448853,
"learning_rate": 2.085939076054065e-05,
"loss": 0.0663,
"step": 2940
},
{
"epoch": 1.76752546434991,
"grad_norm": 2.607135772705078,
"learning_rate": 2.0758523300383298e-05,
"loss": 0.0865,
"step": 2950
},
{
"epoch": 1.773517076093469,
"grad_norm": 3.233673095703125,
"learning_rate": 2.0657655840225944e-05,
"loss": 0.07,
"step": 2960
},
{
"epoch": 1.779508687837028,
"grad_norm": 1.5085541009902954,
"learning_rate": 2.055678838006859e-05,
"loss": 0.0676,
"step": 2970
},
{
"epoch": 1.7855002995805873,
"grad_norm": 2.5728871822357178,
"learning_rate": 2.045592091991124e-05,
"loss": 0.0674,
"step": 2980
},
{
"epoch": 1.791491911324146,
"grad_norm": 2.0894975662231445,
"learning_rate": 2.0355053459753884e-05,
"loss": 0.0681,
"step": 2990
},
{
"epoch": 1.7974835230677053,
"grad_norm": 3.2060320377349854,
"learning_rate": 2.025418599959653e-05,
"loss": 0.0767,
"step": 3000
},
{
"epoch": 1.7974835230677053,
"eval_loss": 0.7448650002479553,
"eval_runtime": 676.6864,
"eval_samples_per_second": 2.096,
"eval_steps_per_second": 0.132,
"eval_wer": 0.460510536234367,
"step": 3000
},
{
"epoch": 1.803475134811264,
"grad_norm": 1.9069411754608154,
"learning_rate": 2.015331853943918e-05,
"loss": 0.0594,
"step": 3010
},
{
"epoch": 1.8094667465548233,
"grad_norm": 3.2389609813690186,
"learning_rate": 2.0052451079281824e-05,
"loss": 0.0869,
"step": 3020
},
{
"epoch": 1.8154583582983823,
"grad_norm": 2.8738017082214355,
"learning_rate": 1.9951583619124473e-05,
"loss": 0.068,
"step": 3030
},
{
"epoch": 1.8214499700419413,
"grad_norm": 2.5264780521392822,
"learning_rate": 1.985071615896712e-05,
"loss": 0.0745,
"step": 3040
},
{
"epoch": 1.8274415817855003,
"grad_norm": 1.7072393894195557,
"learning_rate": 1.9749848698809764e-05,
"loss": 0.0732,
"step": 3050
},
{
"epoch": 1.8334331935290593,
"grad_norm": 4.048574447631836,
"learning_rate": 1.9648981238652413e-05,
"loss": 0.0676,
"step": 3060
},
{
"epoch": 1.8394248052726183,
"grad_norm": 2.7415385246276855,
"learning_rate": 1.954811377849506e-05,
"loss": 0.0658,
"step": 3070
},
{
"epoch": 1.8454164170161773,
"grad_norm": 1.9306994676589966,
"learning_rate": 1.9447246318337707e-05,
"loss": 0.0841,
"step": 3080
},
{
"epoch": 1.8514080287597365,
"grad_norm": 2.8990273475646973,
"learning_rate": 1.9346378858180353e-05,
"loss": 0.0743,
"step": 3090
},
{
"epoch": 1.8573996405032953,
"grad_norm": 2.0166501998901367,
"learning_rate": 1.9245511398023e-05,
"loss": 0.079,
"step": 3100
},
{
"epoch": 1.8633912522468545,
"grad_norm": 2.3217945098876953,
"learning_rate": 1.9144643937865644e-05,
"loss": 0.0645,
"step": 3110
},
{
"epoch": 1.8693828639904133,
"grad_norm": 2.787743330001831,
"learning_rate": 1.9043776477708293e-05,
"loss": 0.0614,
"step": 3120
},
{
"epoch": 1.8753744757339725,
"grad_norm": 1.7940049171447754,
"learning_rate": 1.894290901755094e-05,
"loss": 0.0607,
"step": 3130
},
{
"epoch": 1.8813660874775313,
"grad_norm": 2.0448403358459473,
"learning_rate": 1.8842041557393587e-05,
"loss": 0.0784,
"step": 3140
},
{
"epoch": 1.8873576992210905,
"grad_norm": 2.265869379043579,
"learning_rate": 1.8741174097236233e-05,
"loss": 0.0661,
"step": 3150
},
{
"epoch": 1.8933493109646495,
"grad_norm": 1.9905105829238892,
"learning_rate": 1.8640306637078882e-05,
"loss": 0.0667,
"step": 3160
},
{
"epoch": 1.8993409227082085,
"grad_norm": 1.5427712202072144,
"learning_rate": 1.8539439176921524e-05,
"loss": 0.0662,
"step": 3170
},
{
"epoch": 1.9053325344517675,
"grad_norm": 1.6274867057800293,
"learning_rate": 1.8438571716764173e-05,
"loss": 0.0657,
"step": 3180
},
{
"epoch": 1.9113241461953265,
"grad_norm": 1.5048216581344604,
"learning_rate": 1.833770425660682e-05,
"loss": 0.0741,
"step": 3190
},
{
"epoch": 1.9173157579388855,
"grad_norm": 2.2717835903167725,
"learning_rate": 1.8236836796449468e-05,
"loss": 0.0601,
"step": 3200
},
{
"epoch": 1.9233073696824445,
"grad_norm": 2.6843059062957764,
"learning_rate": 1.8135969336292113e-05,
"loss": 0.0697,
"step": 3210
},
{
"epoch": 1.9292989814260038,
"grad_norm": 1.437799334526062,
"learning_rate": 1.8035101876134762e-05,
"loss": 0.0758,
"step": 3220
},
{
"epoch": 1.9352905931695625,
"grad_norm": 2.8273918628692627,
"learning_rate": 1.7934234415977408e-05,
"loss": 0.0792,
"step": 3230
},
{
"epoch": 1.9412822049131218,
"grad_norm": 2.177579641342163,
"learning_rate": 1.7833366955820053e-05,
"loss": 0.0659,
"step": 3240
},
{
"epoch": 1.9472738166566805,
"grad_norm": 1.6548727750778198,
"learning_rate": 1.77324994956627e-05,
"loss": 0.064,
"step": 3250
},
{
"epoch": 1.9532654284002398,
"grad_norm": 2.316882371902466,
"learning_rate": 1.7631632035505348e-05,
"loss": 0.0601,
"step": 3260
},
{
"epoch": 1.9592570401437985,
"grad_norm": 2.911669969558716,
"learning_rate": 1.7530764575347993e-05,
"loss": 0.058,
"step": 3270
},
{
"epoch": 1.9652486518873578,
"grad_norm": 2.2127685546875,
"learning_rate": 1.7429897115190642e-05,
"loss": 0.0745,
"step": 3280
},
{
"epoch": 1.9712402636309168,
"grad_norm": 1.9832196235656738,
"learning_rate": 1.7329029655033288e-05,
"loss": 0.0723,
"step": 3290
},
{
"epoch": 1.9772318753744758,
"grad_norm": 2.8020925521850586,
"learning_rate": 1.7228162194875933e-05,
"loss": 0.0676,
"step": 3300
},
{
"epoch": 1.9832234871180348,
"grad_norm": 3.036353349685669,
"learning_rate": 1.712729473471858e-05,
"loss": 0.0776,
"step": 3310
},
{
"epoch": 1.9892150988615938,
"grad_norm": 1.0084120035171509,
"learning_rate": 1.7026427274561228e-05,
"loss": 0.0559,
"step": 3320
},
{
"epoch": 1.9952067106051528,
"grad_norm": 3.1513946056365967,
"learning_rate": 1.6925559814403873e-05,
"loss": 0.0773,
"step": 3330
},
{
"epoch": 2.0011983223487118,
"grad_norm": 1.6514886617660522,
"learning_rate": 1.6824692354246522e-05,
"loss": 0.0555,
"step": 3340
},
{
"epoch": 2.007189934092271,
"grad_norm": 1.4943495988845825,
"learning_rate": 1.6723824894089168e-05,
"loss": 0.0401,
"step": 3350
},
{
"epoch": 2.0131815458358298,
"grad_norm": 2.2410435676574707,
"learning_rate": 1.6622957433931817e-05,
"loss": 0.0395,
"step": 3360
},
{
"epoch": 2.019173157579389,
"grad_norm": 1.5972563028335571,
"learning_rate": 1.652208997377446e-05,
"loss": 0.0365,
"step": 3370
},
{
"epoch": 2.0251647693229478,
"grad_norm": 1.4300589561462402,
"learning_rate": 1.6421222513617108e-05,
"loss": 0.0361,
"step": 3380
},
{
"epoch": 2.031156381066507,
"grad_norm": 1.1573065519332886,
"learning_rate": 1.6320355053459753e-05,
"loss": 0.0401,
"step": 3390
},
{
"epoch": 2.0371479928100658,
"grad_norm": 1.3025338649749756,
"learning_rate": 1.6219487593302402e-05,
"loss": 0.0295,
"step": 3400
},
{
"epoch": 2.043139604553625,
"grad_norm": 2.5381758213043213,
"learning_rate": 1.6118620133145048e-05,
"loss": 0.0371,
"step": 3410
},
{
"epoch": 2.0491312162971838,
"grad_norm": 2.166651487350464,
"learning_rate": 1.6017752672987697e-05,
"loss": 0.0335,
"step": 3420
},
{
"epoch": 2.055122828040743,
"grad_norm": 1.688333511352539,
"learning_rate": 1.5916885212830342e-05,
"loss": 0.0399,
"step": 3430
},
{
"epoch": 2.061114439784302,
"grad_norm": 2.373098611831665,
"learning_rate": 1.5816017752672988e-05,
"loss": 0.036,
"step": 3440
},
{
"epoch": 2.067106051527861,
"grad_norm": 2.6142423152923584,
"learning_rate": 1.5715150292515633e-05,
"loss": 0.0377,
"step": 3450
},
{
"epoch": 2.07309766327142,
"grad_norm": 1.7786281108856201,
"learning_rate": 1.5614282832358282e-05,
"loss": 0.0365,
"step": 3460
},
{
"epoch": 2.079089275014979,
"grad_norm": 1.6953134536743164,
"learning_rate": 1.5513415372200928e-05,
"loss": 0.0401,
"step": 3470
},
{
"epoch": 2.085080886758538,
"grad_norm": 2.5490822792053223,
"learning_rate": 1.5412547912043577e-05,
"loss": 0.0363,
"step": 3480
},
{
"epoch": 2.091072498502097,
"grad_norm": 1.8173693418502808,
"learning_rate": 1.5311680451886222e-05,
"loss": 0.0425,
"step": 3490
},
{
"epoch": 2.097064110245656,
"grad_norm": 2.4072582721710205,
"learning_rate": 1.5210812991728868e-05,
"loss": 0.0313,
"step": 3500
},
{
"epoch": 2.103055721989215,
"grad_norm": 1.7706407308578491,
"learning_rate": 1.5109945531571515e-05,
"loss": 0.0436,
"step": 3510
},
{
"epoch": 2.109047333732774,
"grad_norm": 1.4706944227218628,
"learning_rate": 1.5009078071414162e-05,
"loss": 0.0355,
"step": 3520
},
{
"epoch": 2.115038945476333,
"grad_norm": 1.9873573780059814,
"learning_rate": 1.490821061125681e-05,
"loss": 0.0436,
"step": 3530
},
{
"epoch": 2.121030557219892,
"grad_norm": 3.540144920349121,
"learning_rate": 1.4807343151099457e-05,
"loss": 0.0403,
"step": 3540
},
{
"epoch": 2.127022168963451,
"grad_norm": 1.8939119577407837,
"learning_rate": 1.4706475690942104e-05,
"loss": 0.035,
"step": 3550
},
{
"epoch": 2.13301378070701,
"grad_norm": 1.3639899492263794,
"learning_rate": 1.4605608230784751e-05,
"loss": 0.0402,
"step": 3560
},
{
"epoch": 2.139005392450569,
"grad_norm": 1.7946410179138184,
"learning_rate": 1.4504740770627395e-05,
"loss": 0.0292,
"step": 3570
},
{
"epoch": 2.144997004194128,
"grad_norm": 2.206691265106201,
"learning_rate": 1.4403873310470043e-05,
"loss": 0.0381,
"step": 3580
},
{
"epoch": 2.1509886159376874,
"grad_norm": 1.7045485973358154,
"learning_rate": 1.430300585031269e-05,
"loss": 0.0325,
"step": 3590
},
{
"epoch": 2.156980227681246,
"grad_norm": 1.582770586013794,
"learning_rate": 1.4202138390155337e-05,
"loss": 0.0286,
"step": 3600
},
{
"epoch": 2.1629718394248054,
"grad_norm": 2.6559298038482666,
"learning_rate": 1.4101270929997984e-05,
"loss": 0.0388,
"step": 3610
},
{
"epoch": 2.168963451168364,
"grad_norm": 2.094505786895752,
"learning_rate": 1.4000403469840632e-05,
"loss": 0.0347,
"step": 3620
},
{
"epoch": 2.1749550629119234,
"grad_norm": 1.6940979957580566,
"learning_rate": 1.3899536009683275e-05,
"loss": 0.0472,
"step": 3630
},
{
"epoch": 2.180946674655482,
"grad_norm": 2.0886759757995605,
"learning_rate": 1.3798668549525923e-05,
"loss": 0.0333,
"step": 3640
},
{
"epoch": 2.1869382863990414,
"grad_norm": 1.226136565208435,
"learning_rate": 1.369780108936857e-05,
"loss": 0.0301,
"step": 3650
},
{
"epoch": 2.1929298981426,
"grad_norm": 1.9858462810516357,
"learning_rate": 1.3596933629211217e-05,
"loss": 0.0357,
"step": 3660
},
{
"epoch": 2.1989215098861594,
"grad_norm": 1.2248085737228394,
"learning_rate": 1.3496066169053864e-05,
"loss": 0.0282,
"step": 3670
},
{
"epoch": 2.204913121629718,
"grad_norm": 1.8398869037628174,
"learning_rate": 1.3395198708896512e-05,
"loss": 0.0376,
"step": 3680
},
{
"epoch": 2.2109047333732774,
"grad_norm": 2.07497501373291,
"learning_rate": 1.3294331248739159e-05,
"loss": 0.0435,
"step": 3690
},
{
"epoch": 2.2168963451168366,
"grad_norm": 1.5605896711349487,
"learning_rate": 1.3193463788581803e-05,
"loss": 0.0502,
"step": 3700
},
{
"epoch": 2.2228879568603954,
"grad_norm": 1.877122402191162,
"learning_rate": 1.309259632842445e-05,
"loss": 0.0326,
"step": 3710
},
{
"epoch": 2.2288795686039546,
"grad_norm": 1.2292389869689941,
"learning_rate": 1.2991728868267097e-05,
"loss": 0.0413,
"step": 3720
},
{
"epoch": 2.2348711803475134,
"grad_norm": 3.2773098945617676,
"learning_rate": 1.2890861408109744e-05,
"loss": 0.0345,
"step": 3730
},
{
"epoch": 2.2408627920910726,
"grad_norm": 1.8774651288986206,
"learning_rate": 1.2789993947952392e-05,
"loss": 0.0361,
"step": 3740
},
{
"epoch": 2.2468544038346314,
"grad_norm": 2.3334178924560547,
"learning_rate": 1.2689126487795039e-05,
"loss": 0.0275,
"step": 3750
},
{
"epoch": 2.2528460155781906,
"grad_norm": 2.0709402561187744,
"learning_rate": 1.2588259027637686e-05,
"loss": 0.0474,
"step": 3760
},
{
"epoch": 2.2588376273217494,
"grad_norm": 1.9124011993408203,
"learning_rate": 1.2487391567480332e-05,
"loss": 0.0331,
"step": 3770
},
{
"epoch": 2.2648292390653086,
"grad_norm": 1.548407793045044,
"learning_rate": 1.2386524107322977e-05,
"loss": 0.0325,
"step": 3780
},
{
"epoch": 2.2708208508088674,
"grad_norm": 1.7963712215423584,
"learning_rate": 1.2285656647165625e-05,
"loss": 0.0388,
"step": 3790
},
{
"epoch": 2.2768124625524266,
"grad_norm": 1.190773606300354,
"learning_rate": 1.2184789187008272e-05,
"loss": 0.034,
"step": 3800
},
{
"epoch": 2.282804074295986,
"grad_norm": 2.5754892826080322,
"learning_rate": 1.2083921726850919e-05,
"loss": 0.0351,
"step": 3810
},
{
"epoch": 2.2887956860395446,
"grad_norm": 1.9364176988601685,
"learning_rate": 1.1983054266693565e-05,
"loss": 0.0353,
"step": 3820
},
{
"epoch": 2.2947872977831034,
"grad_norm": 2.3457119464874268,
"learning_rate": 1.1882186806536212e-05,
"loss": 0.0322,
"step": 3830
},
{
"epoch": 2.3007789095266626,
"grad_norm": 2.6304166316986084,
"learning_rate": 1.1781319346378859e-05,
"loss": 0.0396,
"step": 3840
},
{
"epoch": 2.306770521270222,
"grad_norm": 2.0976033210754395,
"learning_rate": 1.1680451886221505e-05,
"loss": 0.0313,
"step": 3850
},
{
"epoch": 2.3127621330137806,
"grad_norm": 1.447740912437439,
"learning_rate": 1.1579584426064152e-05,
"loss": 0.0307,
"step": 3860
},
{
"epoch": 2.31875374475734,
"grad_norm": 1.0516040325164795,
"learning_rate": 1.1478716965906799e-05,
"loss": 0.0298,
"step": 3870
},
{
"epoch": 2.3247453565008986,
"grad_norm": 1.3127169609069824,
"learning_rate": 1.1377849505749445e-05,
"loss": 0.0368,
"step": 3880
},
{
"epoch": 2.330736968244458,
"grad_norm": 1.0407856702804565,
"learning_rate": 1.1276982045592092e-05,
"loss": 0.0379,
"step": 3890
},
{
"epoch": 2.3367285799880166,
"grad_norm": 2.1066038608551025,
"learning_rate": 1.1176114585434739e-05,
"loss": 0.0388,
"step": 3900
},
{
"epoch": 2.342720191731576,
"grad_norm": 2.169619560241699,
"learning_rate": 1.1075247125277385e-05,
"loss": 0.0339,
"step": 3910
},
{
"epoch": 2.3487118034751346,
"grad_norm": 1.432060956954956,
"learning_rate": 1.0974379665120032e-05,
"loss": 0.0393,
"step": 3920
},
{
"epoch": 2.354703415218694,
"grad_norm": 2.5926220417022705,
"learning_rate": 1.087351220496268e-05,
"loss": 0.042,
"step": 3930
},
{
"epoch": 2.3606950269622526,
"grad_norm": 1.5448261499404907,
"learning_rate": 1.0772644744805326e-05,
"loss": 0.0364,
"step": 3940
},
{
"epoch": 2.366686638705812,
"grad_norm": 1.302937388420105,
"learning_rate": 1.0671777284647972e-05,
"loss": 0.0408,
"step": 3950
},
{
"epoch": 2.372678250449371,
"grad_norm": 1.1434581279754639,
"learning_rate": 1.057090982449062e-05,
"loss": 0.0334,
"step": 3960
},
{
"epoch": 2.37866986219293,
"grad_norm": 1.498061180114746,
"learning_rate": 1.0470042364333266e-05,
"loss": 0.0356,
"step": 3970
},
{
"epoch": 2.384661473936489,
"grad_norm": 2.3065295219421387,
"learning_rate": 1.0369174904175914e-05,
"loss": 0.0395,
"step": 3980
},
{
"epoch": 2.390653085680048,
"grad_norm": 2.0692338943481445,
"learning_rate": 1.026830744401856e-05,
"loss": 0.0313,
"step": 3990
},
{
"epoch": 2.396644697423607,
"grad_norm": 2.2149441242218018,
"learning_rate": 1.0167439983861206e-05,
"loss": 0.0357,
"step": 4000
},
{
"epoch": 2.396644697423607,
"eval_loss": 0.7519157528877258,
"eval_runtime": 689.0466,
"eval_samples_per_second": 2.058,
"eval_steps_per_second": 0.129,
"eval_wer": 0.45348637998972074,
"step": 4000
},
{
"epoch": 2.402636309167166,
"grad_norm": 1.8555347919464111,
"learning_rate": 1.0066572523703854e-05,
"loss": 0.0276,
"step": 4010
},
{
"epoch": 2.408627920910725,
"grad_norm": 0.9930511116981506,
"learning_rate": 9.965705063546501e-06,
"loss": 0.0313,
"step": 4020
},
{
"epoch": 2.414619532654284,
"grad_norm": 1.6992213726043701,
"learning_rate": 9.864837603389147e-06,
"loss": 0.0314,
"step": 4030
},
{
"epoch": 2.420611144397843,
"grad_norm": 1.9412932395935059,
"learning_rate": 9.763970143231794e-06,
"loss": 0.0328,
"step": 4040
},
{
"epoch": 2.426602756141402,
"grad_norm": 2.8163645267486572,
"learning_rate": 9.663102683074441e-06,
"loss": 0.0401,
"step": 4050
},
{
"epoch": 2.432594367884961,
"grad_norm": 2.6094470024108887,
"learning_rate": 9.562235222917088e-06,
"loss": 0.034,
"step": 4060
},
{
"epoch": 2.4385859796285203,
"grad_norm": 1.896148443222046,
"learning_rate": 9.461367762759736e-06,
"loss": 0.0333,
"step": 4070
},
{
"epoch": 2.444577591372079,
"grad_norm": 2.1546242237091064,
"learning_rate": 9.360500302602381e-06,
"loss": 0.0388,
"step": 4080
},
{
"epoch": 2.450569203115638,
"grad_norm": 1.583756446838379,
"learning_rate": 9.259632842445028e-06,
"loss": 0.0279,
"step": 4090
},
{
"epoch": 2.456560814859197,
"grad_norm": 1.8957688808441162,
"learning_rate": 9.158765382287676e-06,
"loss": 0.0347,
"step": 4100
},
{
"epoch": 2.4625524266027563,
"grad_norm": 1.2544060945510864,
"learning_rate": 9.057897922130321e-06,
"loss": 0.0384,
"step": 4110
},
{
"epoch": 2.468544038346315,
"grad_norm": 1.9352102279663086,
"learning_rate": 8.957030461972968e-06,
"loss": 0.0422,
"step": 4120
},
{
"epoch": 2.4745356500898743,
"grad_norm": 1.348984956741333,
"learning_rate": 8.856163001815616e-06,
"loss": 0.029,
"step": 4130
},
{
"epoch": 2.480527261833433,
"grad_norm": 1.6167041063308716,
"learning_rate": 8.755295541658263e-06,
"loss": 0.0311,
"step": 4140
},
{
"epoch": 2.4865188735769923,
"grad_norm": 2.604578971862793,
"learning_rate": 8.654428081500908e-06,
"loss": 0.0362,
"step": 4150
},
{
"epoch": 2.492510485320551,
"grad_norm": 1.353280782699585,
"learning_rate": 8.553560621343556e-06,
"loss": 0.0323,
"step": 4160
},
{
"epoch": 2.4985020970641103,
"grad_norm": 1.6819239854812622,
"learning_rate": 8.452693161186203e-06,
"loss": 0.0356,
"step": 4170
},
{
"epoch": 2.5044937088076695,
"grad_norm": 1.5543423891067505,
"learning_rate": 8.351825701028848e-06,
"loss": 0.0283,
"step": 4180
},
{
"epoch": 2.5104853205512283,
"grad_norm": 1.8067575693130493,
"learning_rate": 8.250958240871496e-06,
"loss": 0.0251,
"step": 4190
},
{
"epoch": 2.516476932294787,
"grad_norm": 1.6408029794692993,
"learning_rate": 8.150090780714143e-06,
"loss": 0.032,
"step": 4200
},
{
"epoch": 2.5224685440383463,
"grad_norm": 2.8149192333221436,
"learning_rate": 8.049223320556788e-06,
"loss": 0.0356,
"step": 4210
},
{
"epoch": 2.5284601557819055,
"grad_norm": 2.3816680908203125,
"learning_rate": 7.948355860399436e-06,
"loss": 0.0508,
"step": 4220
},
{
"epoch": 2.5344517675254643,
"grad_norm": 0.8424966335296631,
"learning_rate": 7.847488400242083e-06,
"loss": 0.0319,
"step": 4230
},
{
"epoch": 2.5404433792690235,
"grad_norm": 2.407496213912964,
"learning_rate": 7.746620940084729e-06,
"loss": 0.0437,
"step": 4240
},
{
"epoch": 2.5464349910125823,
"grad_norm": 1.2954119443893433,
"learning_rate": 7.645753479927376e-06,
"loss": 0.0294,
"step": 4250
},
{
"epoch": 2.5524266027561415,
"grad_norm": 2.2885632514953613,
"learning_rate": 7.544886019770023e-06,
"loss": 0.0293,
"step": 4260
},
{
"epoch": 2.5584182144997003,
"grad_norm": 1.5569554567337036,
"learning_rate": 7.44401855961267e-06,
"loss": 0.0306,
"step": 4270
},
{
"epoch": 2.5644098262432595,
"grad_norm": 1.3652706146240234,
"learning_rate": 7.343151099455316e-06,
"loss": 0.0364,
"step": 4280
},
{
"epoch": 2.5704014379868183,
"grad_norm": 1.461018443107605,
"learning_rate": 7.242283639297963e-06,
"loss": 0.0321,
"step": 4290
},
{
"epoch": 2.5763930497303775,
"grad_norm": 1.4211386442184448,
"learning_rate": 7.14141617914061e-06,
"loss": 0.0278,
"step": 4300
},
{
"epoch": 2.5823846614739363,
"grad_norm": 1.122140645980835,
"learning_rate": 7.040548718983256e-06,
"loss": 0.0257,
"step": 4310
},
{
"epoch": 2.5883762732174955,
"grad_norm": 2.3963217735290527,
"learning_rate": 6.939681258825903e-06,
"loss": 0.0406,
"step": 4320
},
{
"epoch": 2.5943678849610547,
"grad_norm": 2.619145154953003,
"learning_rate": 6.83881379866855e-06,
"loss": 0.0356,
"step": 4330
},
{
"epoch": 2.6003594967046135,
"grad_norm": 2.836984872817993,
"learning_rate": 6.737946338511196e-06,
"loss": 0.0332,
"step": 4340
},
{
"epoch": 2.6063511084481723,
"grad_norm": 1.085598349571228,
"learning_rate": 6.637078878353843e-06,
"loss": 0.0326,
"step": 4350
},
{
"epoch": 2.6123427201917315,
"grad_norm": 1.6264371871948242,
"learning_rate": 6.53621141819649e-06,
"loss": 0.034,
"step": 4360
},
{
"epoch": 2.6183343319352907,
"grad_norm": 2.668785333633423,
"learning_rate": 6.435343958039138e-06,
"loss": 0.0346,
"step": 4370
},
{
"epoch": 2.6243259436788495,
"grad_norm": 1.0972654819488525,
"learning_rate": 6.3445632438975195e-06,
"loss": 0.0277,
"step": 4380
},
{
"epoch": 2.6303175554224087,
"grad_norm": 2.0440616607666016,
"learning_rate": 6.243695783740166e-06,
"loss": 0.0345,
"step": 4390
},
{
"epoch": 2.6363091671659675,
"grad_norm": 1.2406848669052124,
"learning_rate": 6.142828323582812e-06,
"loss": 0.0255,
"step": 4400
},
{
"epoch": 2.6423007789095267,
"grad_norm": 2.209808111190796,
"learning_rate": 6.0419608634254595e-06,
"loss": 0.0344,
"step": 4410
},
{
"epoch": 2.6482923906530855,
"grad_norm": 1.178346037864685,
"learning_rate": 5.941093403268106e-06,
"loss": 0.034,
"step": 4420
},
{
"epoch": 2.6542840023966447,
"grad_norm": 1.8238011598587036,
"learning_rate": 5.840225943110752e-06,
"loss": 0.0334,
"step": 4430
},
{
"epoch": 2.660275614140204,
"grad_norm": 1.1327911615371704,
"learning_rate": 5.7393584829533995e-06,
"loss": 0.0332,
"step": 4440
},
{
"epoch": 2.6662672258837627,
"grad_norm": 1.3287112712860107,
"learning_rate": 5.638491022796046e-06,
"loss": 0.0341,
"step": 4450
},
{
"epoch": 2.6722588376273215,
"grad_norm": 1.4079091548919678,
"learning_rate": 5.537623562638692e-06,
"loss": 0.0295,
"step": 4460
},
{
"epoch": 2.6782504493708807,
"grad_norm": 1.7478376626968384,
"learning_rate": 5.43675610248134e-06,
"loss": 0.0289,
"step": 4470
},
{
"epoch": 2.68424206111444,
"grad_norm": 1.264457106590271,
"learning_rate": 5.335888642323986e-06,
"loss": 0.0269,
"step": 4480
},
{
"epoch": 2.6902336728579987,
"grad_norm": 1.1999608278274536,
"learning_rate": 5.235021182166633e-06,
"loss": 0.0341,
"step": 4490
},
{
"epoch": 2.696225284601558,
"grad_norm": 2.0491466522216797,
"learning_rate": 5.13415372200928e-06,
"loss": 0.0383,
"step": 4500
},
{
"epoch": 2.7022168963451167,
"grad_norm": 1.9697667360305786,
"learning_rate": 5.033286261851927e-06,
"loss": 0.0347,
"step": 4510
},
{
"epoch": 2.708208508088676,
"grad_norm": 1.3339002132415771,
"learning_rate": 4.932418801694573e-06,
"loss": 0.0339,
"step": 4520
},
{
"epoch": 2.7142001198322347,
"grad_norm": 2.346050500869751,
"learning_rate": 4.8315513415372205e-06,
"loss": 0.0317,
"step": 4530
},
{
"epoch": 2.720191731575794,
"grad_norm": 1.9801725149154663,
"learning_rate": 4.730683881379868e-06,
"loss": 0.034,
"step": 4540
},
{
"epoch": 2.7261833433193527,
"grad_norm": 1.5184906721115112,
"learning_rate": 4.629816421222514e-06,
"loss": 0.0294,
"step": 4550
},
{
"epoch": 2.732174955062912,
"grad_norm": 1.131451964378357,
"learning_rate": 4.5289489610651606e-06,
"loss": 0.0345,
"step": 4560
},
{
"epoch": 2.7381665668064707,
"grad_norm": 1.6129716634750366,
"learning_rate": 4.428081500907808e-06,
"loss": 0.0307,
"step": 4570
},
{
"epoch": 2.74415817855003,
"grad_norm": 1.9320335388183594,
"learning_rate": 4.327214040750454e-06,
"loss": 0.03,
"step": 4580
},
{
"epoch": 2.750149790293589,
"grad_norm": 1.6482346057891846,
"learning_rate": 4.2263465805931014e-06,
"loss": 0.0293,
"step": 4590
},
{
"epoch": 2.756141402037148,
"grad_norm": 2.033869504928589,
"learning_rate": 4.125479120435748e-06,
"loss": 0.0157,
"step": 4600
},
{
"epoch": 2.7621330137807067,
"grad_norm": 1.0993421077728271,
"learning_rate": 4.024611660278394e-06,
"loss": 0.027,
"step": 4610
},
{
"epoch": 2.768124625524266,
"grad_norm": 1.4427032470703125,
"learning_rate": 3.9237442001210415e-06,
"loss": 0.0315,
"step": 4620
},
{
"epoch": 2.774116237267825,
"grad_norm": 1.7813125848770142,
"learning_rate": 3.822876739963688e-06,
"loss": 0.0311,
"step": 4630
},
{
"epoch": 2.780107849011384,
"grad_norm": 1.579520583152771,
"learning_rate": 3.722009279806335e-06,
"loss": 0.0254,
"step": 4640
},
{
"epoch": 2.786099460754943,
"grad_norm": 1.8535341024398804,
"learning_rate": 3.6211418196489815e-06,
"loss": 0.0379,
"step": 4650
},
{
"epoch": 2.792091072498502,
"grad_norm": 1.1260581016540527,
"learning_rate": 3.520274359491628e-06,
"loss": 0.0292,
"step": 4660
},
{
"epoch": 2.798082684242061,
"grad_norm": 1.3263887166976929,
"learning_rate": 3.419406899334275e-06,
"loss": 0.0332,
"step": 4670
},
{
"epoch": 2.80407429598562,
"grad_norm": 1.6725775003433228,
"learning_rate": 3.3185394391769216e-06,
"loss": 0.0324,
"step": 4680
},
{
"epoch": 2.810065907729179,
"grad_norm": 1.6184289455413818,
"learning_rate": 3.217671979019569e-06,
"loss": 0.0201,
"step": 4690
},
{
"epoch": 2.8160575194727384,
"grad_norm": 1.532372236251831,
"learning_rate": 3.116804518862215e-06,
"loss": 0.0311,
"step": 4700
},
{
"epoch": 2.822049131216297,
"grad_norm": 1.2122362852096558,
"learning_rate": 3.015937058704862e-06,
"loss": 0.0264,
"step": 4710
},
{
"epoch": 2.828040742959856,
"grad_norm": 2.1291024684906006,
"learning_rate": 2.9150695985475084e-06,
"loss": 0.0279,
"step": 4720
},
{
"epoch": 2.834032354703415,
"grad_norm": 1.5369389057159424,
"learning_rate": 2.8142021383901552e-06,
"loss": 0.0311,
"step": 4730
},
{
"epoch": 2.8400239664469744,
"grad_norm": 1.5334845781326294,
"learning_rate": 2.713334678232802e-06,
"loss": 0.032,
"step": 4740
},
{
"epoch": 2.846015578190533,
"grad_norm": 0.8564344048500061,
"learning_rate": 2.612467218075449e-06,
"loss": 0.0242,
"step": 4750
},
{
"epoch": 2.8520071899340924,
"grad_norm": 1.7857556343078613,
"learning_rate": 2.5115997579180957e-06,
"loss": 0.0398,
"step": 4760
},
{
"epoch": 2.857998801677651,
"grad_norm": 1.5189533233642578,
"learning_rate": 2.4107322977607425e-06,
"loss": 0.0376,
"step": 4770
},
{
"epoch": 2.8639904134212104,
"grad_norm": 1.8716094493865967,
"learning_rate": 2.3098648376033894e-06,
"loss": 0.022,
"step": 4780
},
{
"epoch": 2.869982025164769,
"grad_norm": 1.2873154878616333,
"learning_rate": 2.208997377446036e-06,
"loss": 0.0252,
"step": 4790
},
{
"epoch": 2.8759736369083284,
"grad_norm": 1.3582782745361328,
"learning_rate": 2.108129917288683e-06,
"loss": 0.0255,
"step": 4800
},
{
"epoch": 2.8819652486518876,
"grad_norm": 1.480502724647522,
"learning_rate": 2.00726245713133e-06,
"loss": 0.038,
"step": 4810
},
{
"epoch": 2.8879568603954464,
"grad_norm": 2.472153902053833,
"learning_rate": 1.9063949969739762e-06,
"loss": 0.04,
"step": 4820
},
{
"epoch": 2.893948472139005,
"grad_norm": 2.1943459510803223,
"learning_rate": 1.805527536816623e-06,
"loss": 0.029,
"step": 4830
},
{
"epoch": 2.8999400838825644,
"grad_norm": 0.9275830388069153,
"learning_rate": 1.7046600766592699e-06,
"loss": 0.0283,
"step": 4840
},
{
"epoch": 2.9059316956261236,
"grad_norm": 1.082454800605774,
"learning_rate": 1.6037926165019167e-06,
"loss": 0.0301,
"step": 4850
},
{
"epoch": 2.9119233073696824,
"grad_norm": 1.5443400144577026,
"learning_rate": 1.5029251563445633e-06,
"loss": 0.0257,
"step": 4860
},
{
"epoch": 2.917914919113241,
"grad_norm": 2.1504111289978027,
"learning_rate": 1.40205769618721e-06,
"loss": 0.0294,
"step": 4870
},
{
"epoch": 2.9239065308568004,
"grad_norm": 2.696934223175049,
"learning_rate": 1.3011902360298567e-06,
"loss": 0.0324,
"step": 4880
},
{
"epoch": 2.9298981426003596,
"grad_norm": 1.6266438961029053,
"learning_rate": 1.2003227758725035e-06,
"loss": 0.0253,
"step": 4890
},
{
"epoch": 2.9358897543439184,
"grad_norm": 1.217948317527771,
"learning_rate": 1.0994553157151504e-06,
"loss": 0.0351,
"step": 4900
},
{
"epoch": 2.9418813660874776,
"grad_norm": 1.121309518814087,
"learning_rate": 9.985878555577972e-07,
"loss": 0.028,
"step": 4910
},
{
"epoch": 2.9478729778310364,
"grad_norm": 2.2428746223449707,
"learning_rate": 8.977203954004438e-07,
"loss": 0.0375,
"step": 4920
},
{
"epoch": 2.9538645895745956,
"grad_norm": 1.1667921543121338,
"learning_rate": 7.968529352430906e-07,
"loss": 0.0207,
"step": 4930
},
{
"epoch": 2.9598562013181544,
"grad_norm": 1.7099730968475342,
"learning_rate": 6.959854750857373e-07,
"loss": 0.0331,
"step": 4940
},
{
"epoch": 2.9658478130617136,
"grad_norm": 1.3251785039901733,
"learning_rate": 5.951180149283842e-07,
"loss": 0.0305,
"step": 4950
},
{
"epoch": 2.971839424805273,
"grad_norm": 0.9592264890670776,
"learning_rate": 4.942505547710309e-07,
"loss": 0.0225,
"step": 4960
},
{
"epoch": 2.9778310365488316,
"grad_norm": 1.1208076477050781,
"learning_rate": 3.933830946136777e-07,
"loss": 0.0253,
"step": 4970
},
{
"epoch": 2.9838226482923904,
"grad_norm": 2.31288743019104,
"learning_rate": 2.925156344563244e-07,
"loss": 0.0379,
"step": 4980
},
{
"epoch": 2.9898142600359496,
"grad_norm": 1.670334815979004,
"learning_rate": 1.9164817429897115e-07,
"loss": 0.0353,
"step": 4990
},
{
"epoch": 2.995805871779509,
"grad_norm": 1.1539229154586792,
"learning_rate": 9.078071414161792e-08,
"loss": 0.0363,
"step": 5000
},
{
"epoch": 2.995805871779509,
"eval_loss": 0.7503044009208679,
"eval_runtime": 686.3834,
"eval_samples_per_second": 2.066,
"eval_steps_per_second": 0.13,
"eval_wer": 0.44463480098223973,
"step": 5000
}
],
"logging_steps": 10,
"max_steps": 5007,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 5.1887996928e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}