|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.001018848700968, |
|
"eval_steps": 500, |
|
"global_step": 982, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0020376974019358125, |
|
"grad_norm": 5.835855484008789, |
|
"learning_rate": 3.3783783783783786e-08, |
|
"loss": 0.3773, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004075394803871625, |
|
"grad_norm": 6.126637935638428, |
|
"learning_rate": 6.756756756756757e-08, |
|
"loss": 0.359, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.006113092205807438, |
|
"grad_norm": 6.190367698669434, |
|
"learning_rate": 1.0135135135135137e-07, |
|
"loss": 0.3575, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00815078960774325, |
|
"grad_norm": 6.040923595428467, |
|
"learning_rate": 1.3513513513513515e-07, |
|
"loss": 0.3513, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.010188487009679063, |
|
"grad_norm": 5.735762596130371, |
|
"learning_rate": 1.6891891891891894e-07, |
|
"loss": 0.3365, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.012226184411614875, |
|
"grad_norm": 4.77271842956543, |
|
"learning_rate": 2.0270270270270273e-07, |
|
"loss": 0.3494, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.014263881813550688, |
|
"grad_norm": 4.999124050140381, |
|
"learning_rate": 2.3648648648648652e-07, |
|
"loss": 0.3416, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0163015792154865, |
|
"grad_norm": 4.210795879364014, |
|
"learning_rate": 2.702702702702703e-07, |
|
"loss": 0.3138, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.018339276617422313, |
|
"grad_norm": 2.9208009243011475, |
|
"learning_rate": 3.040540540540541e-07, |
|
"loss": 0.2978, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.020376974019358125, |
|
"grad_norm": 2.9749715328216553, |
|
"learning_rate": 3.378378378378379e-07, |
|
"loss": 0.2941, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.022414671421293938, |
|
"grad_norm": 2.37031888961792, |
|
"learning_rate": 3.716216216216217e-07, |
|
"loss": 0.2859, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.02445236882322975, |
|
"grad_norm": 2.144174098968506, |
|
"learning_rate": 4.0540540540540546e-07, |
|
"loss": 0.2767, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.026490066225165563, |
|
"grad_norm": 2.0079538822174072, |
|
"learning_rate": 4.3918918918918923e-07, |
|
"loss": 0.2718, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.028527763627101375, |
|
"grad_norm": 2.1613566875457764, |
|
"learning_rate": 4.7297297297297305e-07, |
|
"loss": 0.3009, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.030565461029037188, |
|
"grad_norm": 1.8342921733856201, |
|
"learning_rate": 5.067567567567568e-07, |
|
"loss": 0.2576, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.032603158430973, |
|
"grad_norm": 1.9354841709136963, |
|
"learning_rate": 5.405405405405406e-07, |
|
"loss": 0.2781, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.034640855832908816, |
|
"grad_norm": 1.7937740087509155, |
|
"learning_rate": 5.743243243243245e-07, |
|
"loss": 0.2851, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.036678553234844626, |
|
"grad_norm": 1.8419092893600464, |
|
"learning_rate": 6.081081081081082e-07, |
|
"loss": 0.2826, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.03871625063678044, |
|
"grad_norm": 1.814770221710205, |
|
"learning_rate": 6.418918918918919e-07, |
|
"loss": 0.2653, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.04075394803871625, |
|
"grad_norm": 1.7113629579544067, |
|
"learning_rate": 6.756756756756758e-07, |
|
"loss": 0.2695, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04279164544065207, |
|
"grad_norm": 1.6104000806808472, |
|
"learning_rate": 7.094594594594595e-07, |
|
"loss": 0.2605, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.044829342842587876, |
|
"grad_norm": 1.8066229820251465, |
|
"learning_rate": 7.432432432432434e-07, |
|
"loss": 0.2654, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.04686704024452369, |
|
"grad_norm": 1.6007901430130005, |
|
"learning_rate": 7.770270270270271e-07, |
|
"loss": 0.2592, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0489047376464595, |
|
"grad_norm": 1.728143334388733, |
|
"learning_rate": 8.108108108108109e-07, |
|
"loss": 0.27, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.05094243504839532, |
|
"grad_norm": 1.6227835416793823, |
|
"learning_rate": 8.445945945945947e-07, |
|
"loss": 0.2647, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.052980132450331126, |
|
"grad_norm": 1.5776833295822144, |
|
"learning_rate": 8.783783783783785e-07, |
|
"loss": 0.256, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.05501782985226694, |
|
"grad_norm": 1.5556491613388062, |
|
"learning_rate": 9.121621621621622e-07, |
|
"loss": 0.2611, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.05705552725420275, |
|
"grad_norm": 1.6114894151687622, |
|
"learning_rate": 9.459459459459461e-07, |
|
"loss": 0.2666, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.05909322465613857, |
|
"grad_norm": 1.508907675743103, |
|
"learning_rate": 9.797297297297298e-07, |
|
"loss": 0.2513, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.061130922058074376, |
|
"grad_norm": 1.6146596670150757, |
|
"learning_rate": 1.0135135135135136e-06, |
|
"loss": 0.2735, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06316861946001019, |
|
"grad_norm": 1.691273808479309, |
|
"learning_rate": 1.0472972972972973e-06, |
|
"loss": 0.2637, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.065206316861946, |
|
"grad_norm": 1.5835543870925903, |
|
"learning_rate": 1.0810810810810812e-06, |
|
"loss": 0.2657, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.06724401426388181, |
|
"grad_norm": 1.5753782987594604, |
|
"learning_rate": 1.114864864864865e-06, |
|
"loss": 0.2477, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.06928171166581763, |
|
"grad_norm": 1.6279054880142212, |
|
"learning_rate": 1.148648648648649e-06, |
|
"loss": 0.2603, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.07131940906775344, |
|
"grad_norm": 1.789923906326294, |
|
"learning_rate": 1.1824324324324326e-06, |
|
"loss": 0.272, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.07335710646968925, |
|
"grad_norm": 1.9096171855926514, |
|
"learning_rate": 1.2162162162162164e-06, |
|
"loss": 0.2405, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.07539480387162506, |
|
"grad_norm": 1.6934822797775269, |
|
"learning_rate": 1.25e-06, |
|
"loss": 0.2649, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.07743250127356088, |
|
"grad_norm": 1.5827739238739014, |
|
"learning_rate": 1.2837837837837838e-06, |
|
"loss": 0.2433, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.07947019867549669, |
|
"grad_norm": 1.7752106189727783, |
|
"learning_rate": 1.3175675675675676e-06, |
|
"loss": 0.2492, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0815078960774325, |
|
"grad_norm": 1.5755575895309448, |
|
"learning_rate": 1.3513513513513515e-06, |
|
"loss": 0.2483, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08354559347936831, |
|
"grad_norm": 1.5823661088943481, |
|
"learning_rate": 1.3851351351351352e-06, |
|
"loss": 0.2436, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.08558329088130413, |
|
"grad_norm": 1.4753117561340332, |
|
"learning_rate": 1.418918918918919e-06, |
|
"loss": 0.2312, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.08762098828323994, |
|
"grad_norm": 1.6108685731887817, |
|
"learning_rate": 1.4527027027027027e-06, |
|
"loss": 0.2503, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.08965868568517575, |
|
"grad_norm": 1.6036980152130127, |
|
"learning_rate": 1.4864864864864868e-06, |
|
"loss": 0.2409, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.09169638308711156, |
|
"grad_norm": 1.6441094875335693, |
|
"learning_rate": 1.5202702702702704e-06, |
|
"loss": 0.2486, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.09373408048904738, |
|
"grad_norm": 1.6840177774429321, |
|
"learning_rate": 1.5540540540540541e-06, |
|
"loss": 0.2631, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.09577177789098319, |
|
"grad_norm": 1.7543444633483887, |
|
"learning_rate": 1.5878378378378378e-06, |
|
"loss": 0.2632, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.097809475292919, |
|
"grad_norm": 1.5591580867767334, |
|
"learning_rate": 1.6216216216216219e-06, |
|
"loss": 0.2382, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.09984717269485481, |
|
"grad_norm": 1.5250903367996216, |
|
"learning_rate": 1.6554054054054055e-06, |
|
"loss": 0.2431, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.10188487009679063, |
|
"grad_norm": 1.5948268175125122, |
|
"learning_rate": 1.6891891891891894e-06, |
|
"loss": 0.2591, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.10392256749872644, |
|
"grad_norm": 2.0706777572631836, |
|
"learning_rate": 1.722972972972973e-06, |
|
"loss": 0.2536, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.10596026490066225, |
|
"grad_norm": 1.6050032377243042, |
|
"learning_rate": 1.756756756756757e-06, |
|
"loss": 0.2353, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.10799796230259806, |
|
"grad_norm": 1.572332739830017, |
|
"learning_rate": 1.7905405405405408e-06, |
|
"loss": 0.2586, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.11003565970453388, |
|
"grad_norm": 1.5016673803329468, |
|
"learning_rate": 1.8243243243243245e-06, |
|
"loss": 0.2506, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.11207335710646969, |
|
"grad_norm": 1.5977917909622192, |
|
"learning_rate": 1.8581081081081081e-06, |
|
"loss": 0.2377, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.1141110545084055, |
|
"grad_norm": 1.4931672811508179, |
|
"learning_rate": 1.8918918918918922e-06, |
|
"loss": 0.2373, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.11614875191034131, |
|
"grad_norm": 1.4913355112075806, |
|
"learning_rate": 1.925675675675676e-06, |
|
"loss": 0.243, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.11818644931227713, |
|
"grad_norm": 1.3302721977233887, |
|
"learning_rate": 1.9594594594594595e-06, |
|
"loss": 0.2256, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.12022414671421294, |
|
"grad_norm": 1.6652206182479858, |
|
"learning_rate": 1.9932432432432434e-06, |
|
"loss": 0.2444, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.12226184411614875, |
|
"grad_norm": 1.701822280883789, |
|
"learning_rate": 2.0270270270270273e-06, |
|
"loss": 0.2311, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.12429954151808456, |
|
"grad_norm": 2.096843957901001, |
|
"learning_rate": 2.060810810810811e-06, |
|
"loss": 0.2524, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.12633723892002038, |
|
"grad_norm": 1.7165014743804932, |
|
"learning_rate": 2.0945945945945946e-06, |
|
"loss": 0.2514, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.12837493632195618, |
|
"grad_norm": 1.5645636320114136, |
|
"learning_rate": 2.1283783783783785e-06, |
|
"loss": 0.2425, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.130412633723892, |
|
"grad_norm": 1.4007450342178345, |
|
"learning_rate": 2.1621621621621623e-06, |
|
"loss": 0.2489, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.13245033112582782, |
|
"grad_norm": 1.485996127128601, |
|
"learning_rate": 2.195945945945946e-06, |
|
"loss": 0.254, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.13448802852776362, |
|
"grad_norm": 1.4894458055496216, |
|
"learning_rate": 2.22972972972973e-06, |
|
"loss": 0.2502, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.13652572592969944, |
|
"grad_norm": 1.59180748462677, |
|
"learning_rate": 2.2635135135135135e-06, |
|
"loss": 0.2379, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.13856342333163527, |
|
"grad_norm": 1.5528825521469116, |
|
"learning_rate": 2.297297297297298e-06, |
|
"loss": 0.2316, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.14060112073357106, |
|
"grad_norm": 1.7990005016326904, |
|
"learning_rate": 2.3310810810810813e-06, |
|
"loss": 0.2553, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.14263881813550688, |
|
"grad_norm": 1.5510270595550537, |
|
"learning_rate": 2.364864864864865e-06, |
|
"loss": 0.243, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.14467651553744268, |
|
"grad_norm": 1.5694071054458618, |
|
"learning_rate": 2.3986486486486486e-06, |
|
"loss": 0.2471, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.1467142129393785, |
|
"grad_norm": 1.4359402656555176, |
|
"learning_rate": 2.432432432432433e-06, |
|
"loss": 0.2324, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.14875191034131433, |
|
"grad_norm": 1.372413158416748, |
|
"learning_rate": 2.4662162162162163e-06, |
|
"loss": 0.2455, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.15078960774325012, |
|
"grad_norm": 1.4357329607009888, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.2332, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.15282730514518594, |
|
"grad_norm": 1.4806591272354126, |
|
"learning_rate": 2.533783783783784e-06, |
|
"loss": 0.2367, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.15486500254712177, |
|
"grad_norm": 1.5909092426300049, |
|
"learning_rate": 2.5675675675675675e-06, |
|
"loss": 0.2351, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.15690269994905756, |
|
"grad_norm": 1.3923670053482056, |
|
"learning_rate": 2.601351351351352e-06, |
|
"loss": 0.2413, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.15894039735099338, |
|
"grad_norm": 1.4559671878814697, |
|
"learning_rate": 2.6351351351351353e-06, |
|
"loss": 0.2365, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.16097809475292918, |
|
"grad_norm": 1.5630332231521606, |
|
"learning_rate": 2.668918918918919e-06, |
|
"loss": 0.2431, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.163015792154865, |
|
"grad_norm": 1.783199429512024, |
|
"learning_rate": 2.702702702702703e-06, |
|
"loss": 0.2497, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.16505348955680083, |
|
"grad_norm": 1.385953664779663, |
|
"learning_rate": 2.7364864864864865e-06, |
|
"loss": 0.2456, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.16709118695873662, |
|
"grad_norm": 1.5928620100021362, |
|
"learning_rate": 2.7702702702702703e-06, |
|
"loss": 0.2356, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.16912888436067244, |
|
"grad_norm": 1.6051955223083496, |
|
"learning_rate": 2.8040540540540546e-06, |
|
"loss": 0.2443, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.17116658176260827, |
|
"grad_norm": 1.5298365354537964, |
|
"learning_rate": 2.837837837837838e-06, |
|
"loss": 0.2354, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.17320427916454406, |
|
"grad_norm": 1.4721895456314087, |
|
"learning_rate": 2.871621621621622e-06, |
|
"loss": 0.2271, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.17524197656647988, |
|
"grad_norm": 1.3692090511322021, |
|
"learning_rate": 2.9054054054054054e-06, |
|
"loss": 0.2416, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.17727967396841568, |
|
"grad_norm": 1.4523481130599976, |
|
"learning_rate": 2.9391891891891893e-06, |
|
"loss": 0.2271, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.1793173713703515, |
|
"grad_norm": 1.5602335929870605, |
|
"learning_rate": 2.9729729729729736e-06, |
|
"loss": 0.2498, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.18135506877228733, |
|
"grad_norm": 1.478890061378479, |
|
"learning_rate": 3.006756756756757e-06, |
|
"loss": 0.2373, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.18339276617422312, |
|
"grad_norm": 1.4697165489196777, |
|
"learning_rate": 3.040540540540541e-06, |
|
"loss": 0.2522, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.18543046357615894, |
|
"grad_norm": 1.5102362632751465, |
|
"learning_rate": 3.0743243243243248e-06, |
|
"loss": 0.2403, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.18746816097809477, |
|
"grad_norm": 1.3657342195510864, |
|
"learning_rate": 3.1081081081081082e-06, |
|
"loss": 0.2304, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.18950585838003056, |
|
"grad_norm": 1.5056520700454712, |
|
"learning_rate": 3.141891891891892e-06, |
|
"loss": 0.2372, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.19154355578196638, |
|
"grad_norm": 1.5441231727600098, |
|
"learning_rate": 3.1756756756756755e-06, |
|
"loss": 0.2456, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.19358125318390218, |
|
"grad_norm": 1.472184181213379, |
|
"learning_rate": 3.20945945945946e-06, |
|
"loss": 0.2416, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.195618950585838, |
|
"grad_norm": 1.4700815677642822, |
|
"learning_rate": 3.2432432432432437e-06, |
|
"loss": 0.2254, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.19765664798777383, |
|
"grad_norm": 1.2121925354003906, |
|
"learning_rate": 3.277027027027027e-06, |
|
"loss": 0.2343, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.19969434538970962, |
|
"grad_norm": 1.5309983491897583, |
|
"learning_rate": 3.310810810810811e-06, |
|
"loss": 0.233, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.20173204279164544, |
|
"grad_norm": 1.422556757926941, |
|
"learning_rate": 3.3445945945945953e-06, |
|
"loss": 0.254, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.20376974019358127, |
|
"grad_norm": 1.3337507247924805, |
|
"learning_rate": 3.3783783783783788e-06, |
|
"loss": 0.2443, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.20580743759551706, |
|
"grad_norm": 1.3088750839233398, |
|
"learning_rate": 3.4121621621621626e-06, |
|
"loss": 0.2456, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.20784513499745289, |
|
"grad_norm": 1.5006299018859863, |
|
"learning_rate": 3.445945945945946e-06, |
|
"loss": 0.2315, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.20988283239938868, |
|
"grad_norm": 1.3762035369873047, |
|
"learning_rate": 3.47972972972973e-06, |
|
"loss": 0.2524, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.2119205298013245, |
|
"grad_norm": 1.2947945594787598, |
|
"learning_rate": 3.513513513513514e-06, |
|
"loss": 0.2345, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.21395822720326033, |
|
"grad_norm": 1.3257865905761719, |
|
"learning_rate": 3.5472972972972973e-06, |
|
"loss": 0.2275, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.21599592460519612, |
|
"grad_norm": 1.3588801622390747, |
|
"learning_rate": 3.5810810810810816e-06, |
|
"loss": 0.2302, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.21803362200713194, |
|
"grad_norm": 1.3438785076141357, |
|
"learning_rate": 3.6148648648648655e-06, |
|
"loss": 0.2353, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.22007131940906777, |
|
"grad_norm": 1.415419340133667, |
|
"learning_rate": 3.648648648648649e-06, |
|
"loss": 0.2232, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.22210901681100356, |
|
"grad_norm": 1.2871397733688354, |
|
"learning_rate": 3.6824324324324328e-06, |
|
"loss": 0.2039, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.22414671421293939, |
|
"grad_norm": 1.4281516075134277, |
|
"learning_rate": 3.7162162162162162e-06, |
|
"loss": 0.2476, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.22618441161487518, |
|
"grad_norm": 1.3740307092666626, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 0.2206, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.228222109016811, |
|
"grad_norm": 1.5596915483474731, |
|
"learning_rate": 3.7837837837837844e-06, |
|
"loss": 0.2337, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.23025980641874683, |
|
"grad_norm": 1.501287817955017, |
|
"learning_rate": 3.817567567567567e-06, |
|
"loss": 0.2348, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.23229750382068262, |
|
"grad_norm": 1.5411885976791382, |
|
"learning_rate": 3.851351351351352e-06, |
|
"loss": 0.2466, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.23433520122261844, |
|
"grad_norm": 1.5837326049804688, |
|
"learning_rate": 3.885135135135135e-06, |
|
"loss": 0.2329, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.23637289862455427, |
|
"grad_norm": 1.222288966178894, |
|
"learning_rate": 3.918918918918919e-06, |
|
"loss": 0.2324, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.23841059602649006, |
|
"grad_norm": 1.310904860496521, |
|
"learning_rate": 3.952702702702703e-06, |
|
"loss": 0.2338, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.24044829342842589, |
|
"grad_norm": 1.4169098138809204, |
|
"learning_rate": 3.986486486486487e-06, |
|
"loss": 0.2461, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.24248599083036168, |
|
"grad_norm": 1.6267861127853394, |
|
"learning_rate": 4.020270270270271e-06, |
|
"loss": 0.2506, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.2445236882322975, |
|
"grad_norm": 1.396310567855835, |
|
"learning_rate": 4.0540540540540545e-06, |
|
"loss": 0.2169, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.24656138563423333, |
|
"grad_norm": 1.3390737771987915, |
|
"learning_rate": 4.087837837837838e-06, |
|
"loss": 0.2468, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.24859908303616912, |
|
"grad_norm": 1.418508768081665, |
|
"learning_rate": 4.121621621621622e-06, |
|
"loss": 0.2592, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.25063678043810494, |
|
"grad_norm": 1.4217811822891235, |
|
"learning_rate": 4.155405405405405e-06, |
|
"loss": 0.2277, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.25267447784004077, |
|
"grad_norm": 1.351367473602295, |
|
"learning_rate": 4.189189189189189e-06, |
|
"loss": 0.2421, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.2547121752419766, |
|
"grad_norm": 1.4876950979232788, |
|
"learning_rate": 4.222972972972974e-06, |
|
"loss": 0.2503, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.25674987264391236, |
|
"grad_norm": 1.304235577583313, |
|
"learning_rate": 4.256756756756757e-06, |
|
"loss": 0.2376, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.2587875700458482, |
|
"grad_norm": 1.3415497541427612, |
|
"learning_rate": 4.290540540540541e-06, |
|
"loss": 0.2386, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.260825267447784, |
|
"grad_norm": 1.6345967054367065, |
|
"learning_rate": 4.324324324324325e-06, |
|
"loss": 0.251, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.2628629648497198, |
|
"grad_norm": 1.3598498106002808, |
|
"learning_rate": 4.3581081081081085e-06, |
|
"loss": 0.2568, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.26490066225165565, |
|
"grad_norm": 1.2988228797912598, |
|
"learning_rate": 4.391891891891892e-06, |
|
"loss": 0.2423, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2669383596535914, |
|
"grad_norm": 1.4423168897628784, |
|
"learning_rate": 4.4256756756756754e-06, |
|
"loss": 0.2565, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.26897605705552724, |
|
"grad_norm": 1.4470850229263306, |
|
"learning_rate": 4.45945945945946e-06, |
|
"loss": 0.254, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.27101375445746306, |
|
"grad_norm": 1.3750495910644531, |
|
"learning_rate": 4.493243243243244e-06, |
|
"loss": 0.2438, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.2730514518593989, |
|
"grad_norm": 1.2969499826431274, |
|
"learning_rate": 4.527027027027027e-06, |
|
"loss": 0.2519, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.2750891492613347, |
|
"grad_norm": 1.3548568487167358, |
|
"learning_rate": 4.560810810810811e-06, |
|
"loss": 0.2407, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.27712684666327053, |
|
"grad_norm": 1.3551725149154663, |
|
"learning_rate": 4.594594594594596e-06, |
|
"loss": 0.2432, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.2791645440652063, |
|
"grad_norm": 1.3198033571243286, |
|
"learning_rate": 4.628378378378379e-06, |
|
"loss": 0.2349, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.2812022414671421, |
|
"grad_norm": 1.344118595123291, |
|
"learning_rate": 4.6621621621621625e-06, |
|
"loss": 0.2374, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.28323993886907795, |
|
"grad_norm": 1.4489444494247437, |
|
"learning_rate": 4.695945945945946e-06, |
|
"loss": 0.2452, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.28527763627101377, |
|
"grad_norm": 1.2641702890396118, |
|
"learning_rate": 4.72972972972973e-06, |
|
"loss": 0.2341, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2873153336729496, |
|
"grad_norm": 1.227349042892456, |
|
"learning_rate": 4.763513513513514e-06, |
|
"loss": 0.2351, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.28935303107488536, |
|
"grad_norm": 1.237866759300232, |
|
"learning_rate": 4.797297297297297e-06, |
|
"loss": 0.2416, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.2913907284768212, |
|
"grad_norm": 1.341732144355774, |
|
"learning_rate": 4.831081081081082e-06, |
|
"loss": 0.2273, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.293428425878757, |
|
"grad_norm": 1.2197740077972412, |
|
"learning_rate": 4.864864864864866e-06, |
|
"loss": 0.2371, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.2954661232806928, |
|
"grad_norm": 1.3351991176605225, |
|
"learning_rate": 4.898648648648649e-06, |
|
"loss": 0.237, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.29750382068262865, |
|
"grad_norm": 1.364261507987976, |
|
"learning_rate": 4.932432432432433e-06, |
|
"loss": 0.244, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.2995415180845644, |
|
"grad_norm": 1.4714289903640747, |
|
"learning_rate": 4.9662162162162165e-06, |
|
"loss": 0.2566, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.30157921548650024, |
|
"grad_norm": 1.2321275472640991, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2305, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.30361691288843606, |
|
"grad_norm": 1.257879614830017, |
|
"learning_rate": 4.99622641509434e-06, |
|
"loss": 0.2364, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.3056546102903719, |
|
"grad_norm": 1.3090922832489014, |
|
"learning_rate": 4.99245283018868e-06, |
|
"loss": 0.2322, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3076923076923077, |
|
"grad_norm": 1.2299717664718628, |
|
"learning_rate": 4.988679245283019e-06, |
|
"loss": 0.2267, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.30973000509424353, |
|
"grad_norm": 1.141762375831604, |
|
"learning_rate": 4.984905660377358e-06, |
|
"loss": 0.2332, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.3117677024961793, |
|
"grad_norm": 1.4402216672897339, |
|
"learning_rate": 4.981132075471698e-06, |
|
"loss": 0.2479, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.3138053998981151, |
|
"grad_norm": 1.4099055528640747, |
|
"learning_rate": 4.977358490566038e-06, |
|
"loss": 0.2379, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.31584309730005095, |
|
"grad_norm": 1.2121434211730957, |
|
"learning_rate": 4.973584905660378e-06, |
|
"loss": 0.2328, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.31788079470198677, |
|
"grad_norm": 1.2919939756393433, |
|
"learning_rate": 4.969811320754717e-06, |
|
"loss": 0.242, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.3199184921039226, |
|
"grad_norm": 1.4471988677978516, |
|
"learning_rate": 4.966037735849057e-06, |
|
"loss": 0.2504, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.32195618950585836, |
|
"grad_norm": 2.552502393722534, |
|
"learning_rate": 4.962264150943397e-06, |
|
"loss": 0.2496, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.3239938869077942, |
|
"grad_norm": 1.2150771617889404, |
|
"learning_rate": 4.958490566037736e-06, |
|
"loss": 0.2385, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.32603158430973, |
|
"grad_norm": 1.3135796785354614, |
|
"learning_rate": 4.954716981132076e-06, |
|
"loss": 0.2355, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3280692817116658, |
|
"grad_norm": 1.3980915546417236, |
|
"learning_rate": 4.950943396226415e-06, |
|
"loss": 0.2338, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.33010697911360165, |
|
"grad_norm": 1.3292484283447266, |
|
"learning_rate": 4.947169811320755e-06, |
|
"loss": 0.243, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.3321446765155374, |
|
"grad_norm": 1.5226550102233887, |
|
"learning_rate": 4.943396226415095e-06, |
|
"loss": 0.2486, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.33418237391747324, |
|
"grad_norm": 1.2037345170974731, |
|
"learning_rate": 4.939622641509435e-06, |
|
"loss": 0.2528, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.33622007131940906, |
|
"grad_norm": 1.3524994850158691, |
|
"learning_rate": 4.935849056603774e-06, |
|
"loss": 0.2739, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3382577687213449, |
|
"grad_norm": 1.1955732107162476, |
|
"learning_rate": 4.932075471698114e-06, |
|
"loss": 0.2257, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.3402954661232807, |
|
"grad_norm": 1.273659348487854, |
|
"learning_rate": 4.928301886792453e-06, |
|
"loss": 0.2434, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.34233316352521653, |
|
"grad_norm": 1.365476369857788, |
|
"learning_rate": 4.924528301886793e-06, |
|
"loss": 0.2269, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.3443708609271523, |
|
"grad_norm": 1.3352711200714111, |
|
"learning_rate": 4.920754716981133e-06, |
|
"loss": 0.2413, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.3464085583290881, |
|
"grad_norm": 1.2405195236206055, |
|
"learning_rate": 4.916981132075473e-06, |
|
"loss": 0.2382, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.34844625573102395, |
|
"grad_norm": 1.4409878253936768, |
|
"learning_rate": 4.913207547169812e-06, |
|
"loss": 0.2379, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.35048395313295977, |
|
"grad_norm": 1.269126534461975, |
|
"learning_rate": 4.909433962264152e-06, |
|
"loss": 0.2366, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.3525216505348956, |
|
"grad_norm": 1.1738016605377197, |
|
"learning_rate": 4.905660377358491e-06, |
|
"loss": 0.2311, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.35455934793683136, |
|
"grad_norm": 1.3719390630722046, |
|
"learning_rate": 4.9018867924528306e-06, |
|
"loss": 0.2238, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.3565970453387672, |
|
"grad_norm": 1.301747441291809, |
|
"learning_rate": 4.8981132075471705e-06, |
|
"loss": 0.2376, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.358634742740703, |
|
"grad_norm": 1.2950748205184937, |
|
"learning_rate": 4.8943396226415095e-06, |
|
"loss": 0.2614, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.36067244014263883, |
|
"grad_norm": 1.3460197448730469, |
|
"learning_rate": 4.8905660377358495e-06, |
|
"loss": 0.247, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.36271013754457465, |
|
"grad_norm": 1.1415988206863403, |
|
"learning_rate": 4.886792452830189e-06, |
|
"loss": 0.236, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.3647478349465104, |
|
"grad_norm": 1.2343894243240356, |
|
"learning_rate": 4.8830188679245284e-06, |
|
"loss": 0.2338, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.36678553234844624, |
|
"grad_norm": 1.3122376203536987, |
|
"learning_rate": 4.879245283018868e-06, |
|
"loss": 0.2512, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.36882322975038206, |
|
"grad_norm": 1.1867709159851074, |
|
"learning_rate": 4.875471698113207e-06, |
|
"loss": 0.2319, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.3708609271523179, |
|
"grad_norm": 1.242540955543518, |
|
"learning_rate": 4.871698113207547e-06, |
|
"loss": 0.2391, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.3728986245542537, |
|
"grad_norm": 1.277694821357727, |
|
"learning_rate": 4.867924528301887e-06, |
|
"loss": 0.2334, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.37493632195618953, |
|
"grad_norm": 1.341858983039856, |
|
"learning_rate": 4.864150943396227e-06, |
|
"loss": 0.2363, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.3769740193581253, |
|
"grad_norm": 1.2502440214157104, |
|
"learning_rate": 4.860377358490567e-06, |
|
"loss": 0.2342, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.3790117167600611, |
|
"grad_norm": 1.2432913780212402, |
|
"learning_rate": 4.856603773584906e-06, |
|
"loss": 0.2436, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.38104941416199695, |
|
"grad_norm": 1.3028502464294434, |
|
"learning_rate": 4.852830188679245e-06, |
|
"loss": 0.2367, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.38308711156393277, |
|
"grad_norm": 1.3056414127349854, |
|
"learning_rate": 4.849056603773585e-06, |
|
"loss": 0.2221, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.3851248089658686, |
|
"grad_norm": 1.1587262153625488, |
|
"learning_rate": 4.845283018867925e-06, |
|
"loss": 0.2511, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.38716250636780436, |
|
"grad_norm": 1.3277629613876343, |
|
"learning_rate": 4.841509433962265e-06, |
|
"loss": 0.2376, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3892002037697402, |
|
"grad_norm": 1.3022247552871704, |
|
"learning_rate": 4.837735849056604e-06, |
|
"loss": 0.2503, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.391237901171676, |
|
"grad_norm": 1.1903053522109985, |
|
"learning_rate": 4.833962264150944e-06, |
|
"loss": 0.2376, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.39327559857361183, |
|
"grad_norm": 1.3128589391708374, |
|
"learning_rate": 4.830188679245284e-06, |
|
"loss": 0.2393, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.39531329597554765, |
|
"grad_norm": 2.1321053504943848, |
|
"learning_rate": 4.826415094339623e-06, |
|
"loss": 0.2214, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.3973509933774834, |
|
"grad_norm": 1.255610466003418, |
|
"learning_rate": 4.822641509433963e-06, |
|
"loss": 0.2377, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.39938869077941924, |
|
"grad_norm": 1.1986833810806274, |
|
"learning_rate": 4.818867924528302e-06, |
|
"loss": 0.2285, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.40142638818135506, |
|
"grad_norm": 1.2554630041122437, |
|
"learning_rate": 4.815094339622642e-06, |
|
"loss": 0.2491, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.4034640855832909, |
|
"grad_norm": 1.2297279834747314, |
|
"learning_rate": 4.811320754716982e-06, |
|
"loss": 0.2301, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.4055017829852267, |
|
"grad_norm": 1.3254568576812744, |
|
"learning_rate": 4.807547169811322e-06, |
|
"loss": 0.2326, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.40753948038716253, |
|
"grad_norm": 1.2830324172973633, |
|
"learning_rate": 4.803773584905661e-06, |
|
"loss": 0.2466, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4095771777890983, |
|
"grad_norm": 1.3986701965332031, |
|
"learning_rate": 4.800000000000001e-06, |
|
"loss": 0.2632, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.4116148751910341, |
|
"grad_norm": 1.3166711330413818, |
|
"learning_rate": 4.79622641509434e-06, |
|
"loss": 0.2294, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.41365257259296995, |
|
"grad_norm": 1.4140809774398804, |
|
"learning_rate": 4.79245283018868e-06, |
|
"loss": 0.2524, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.41569026999490577, |
|
"grad_norm": 1.298222303390503, |
|
"learning_rate": 4.7886792452830195e-06, |
|
"loss": 0.2201, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.4177279673968416, |
|
"grad_norm": 1.2514641284942627, |
|
"learning_rate": 4.7849056603773594e-06, |
|
"loss": 0.2458, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.41976566479877736, |
|
"grad_norm": 1.1963963508605957, |
|
"learning_rate": 4.7811320754716985e-06, |
|
"loss": 0.2363, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.4218033622007132, |
|
"grad_norm": 1.2280910015106201, |
|
"learning_rate": 4.777358490566038e-06, |
|
"loss": 0.249, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.423841059602649, |
|
"grad_norm": 1.2325594425201416, |
|
"learning_rate": 4.7735849056603775e-06, |
|
"loss": 0.239, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.42587875700458483, |
|
"grad_norm": 1.268089771270752, |
|
"learning_rate": 4.769811320754717e-06, |
|
"loss": 0.2428, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.42791645440652065, |
|
"grad_norm": 1.147208571434021, |
|
"learning_rate": 4.766037735849057e-06, |
|
"loss": 0.2027, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.4299541518084564, |
|
"grad_norm": 1.293784260749817, |
|
"learning_rate": 4.762264150943396e-06, |
|
"loss": 0.2467, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.43199184921039224, |
|
"grad_norm": 1.3141661882400513, |
|
"learning_rate": 4.758490566037736e-06, |
|
"loss": 0.235, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.43402954661232807, |
|
"grad_norm": 1.3035210371017456, |
|
"learning_rate": 4.754716981132076e-06, |
|
"loss": 0.2461, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.4360672440142639, |
|
"grad_norm": 1.26072359085083, |
|
"learning_rate": 4.750943396226415e-06, |
|
"loss": 0.2483, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.4381049414161997, |
|
"grad_norm": 1.3666430711746216, |
|
"learning_rate": 4.747169811320755e-06, |
|
"loss": 0.2343, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.44014263881813553, |
|
"grad_norm": 1.1508736610412598, |
|
"learning_rate": 4.743396226415094e-06, |
|
"loss": 0.2456, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.4421803362200713, |
|
"grad_norm": 1.292580485343933, |
|
"learning_rate": 4.739622641509434e-06, |
|
"loss": 0.2244, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.4442180336220071, |
|
"grad_norm": 1.2299766540527344, |
|
"learning_rate": 4.735849056603774e-06, |
|
"loss": 0.2394, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.44625573102394295, |
|
"grad_norm": 1.316611886024475, |
|
"learning_rate": 4.732075471698114e-06, |
|
"loss": 0.2301, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.44829342842587877, |
|
"grad_norm": 1.3932688236236572, |
|
"learning_rate": 4.728301886792453e-06, |
|
"loss": 0.2184, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4503311258278146, |
|
"grad_norm": 1.2194689512252808, |
|
"learning_rate": 4.724528301886793e-06, |
|
"loss": 0.2312, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.45236882322975036, |
|
"grad_norm": 1.1593202352523804, |
|
"learning_rate": 4.720754716981132e-06, |
|
"loss": 0.2122, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.4544065206316862, |
|
"grad_norm": 1.2845839262008667, |
|
"learning_rate": 4.716981132075472e-06, |
|
"loss": 0.2309, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.456444218033622, |
|
"grad_norm": 1.28933846950531, |
|
"learning_rate": 4.713207547169812e-06, |
|
"loss": 0.2461, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.45848191543555783, |
|
"grad_norm": 1.2130182981491089, |
|
"learning_rate": 4.709433962264151e-06, |
|
"loss": 0.2276, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.46051961283749365, |
|
"grad_norm": 1.1695858240127563, |
|
"learning_rate": 4.705660377358491e-06, |
|
"loss": 0.2386, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.4625573102394294, |
|
"grad_norm": 1.1642833948135376, |
|
"learning_rate": 4.701886792452831e-06, |
|
"loss": 0.2234, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.46459500764136524, |
|
"grad_norm": 1.2203588485717773, |
|
"learning_rate": 4.69811320754717e-06, |
|
"loss": 0.2245, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.46663270504330107, |
|
"grad_norm": 1.5108790397644043, |
|
"learning_rate": 4.69433962264151e-06, |
|
"loss": 0.2587, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.4686704024452369, |
|
"grad_norm": 2.0064799785614014, |
|
"learning_rate": 4.690566037735849e-06, |
|
"loss": 0.2279, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.4707080998471727, |
|
"grad_norm": 1.1252530813217163, |
|
"learning_rate": 4.686792452830189e-06, |
|
"loss": 0.23, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.47274579724910853, |
|
"grad_norm": 1.359333872795105, |
|
"learning_rate": 4.683018867924529e-06, |
|
"loss": 0.2444, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.4747834946510443, |
|
"grad_norm": 1.1184419393539429, |
|
"learning_rate": 4.6792452830188686e-06, |
|
"loss": 0.2339, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.4768211920529801, |
|
"grad_norm": 1.2560921907424927, |
|
"learning_rate": 4.6754716981132085e-06, |
|
"loss": 0.2549, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.47885888945491595, |
|
"grad_norm": 1.164919137954712, |
|
"learning_rate": 4.6716981132075476e-06, |
|
"loss": 0.238, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.48089658685685177, |
|
"grad_norm": 1.3727017641067505, |
|
"learning_rate": 4.667924528301887e-06, |
|
"loss": 0.2434, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.4829342842587876, |
|
"grad_norm": 1.4192495346069336, |
|
"learning_rate": 4.6641509433962265e-06, |
|
"loss": 0.2364, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.48497198166072336, |
|
"grad_norm": 1.3315473794937134, |
|
"learning_rate": 4.6603773584905665e-06, |
|
"loss": 0.2166, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.4870096790626592, |
|
"grad_norm": 1.140080213546753, |
|
"learning_rate": 4.656603773584906e-06, |
|
"loss": 0.2404, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.489047376464595, |
|
"grad_norm": 1.3821226358413696, |
|
"learning_rate": 4.6528301886792454e-06, |
|
"loss": 0.2376, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.49108507386653083, |
|
"grad_norm": 1.3087458610534668, |
|
"learning_rate": 4.649056603773585e-06, |
|
"loss": 0.2364, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.49312277126846665, |
|
"grad_norm": 1.1473658084869385, |
|
"learning_rate": 4.645283018867925e-06, |
|
"loss": 0.2356, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.4951604686704024, |
|
"grad_norm": 1.2189340591430664, |
|
"learning_rate": 4.641509433962264e-06, |
|
"loss": 0.2377, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.49719816607233824, |
|
"grad_norm": 1.6314069032669067, |
|
"learning_rate": 4.637735849056604e-06, |
|
"loss": 0.2496, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.49923586347427407, |
|
"grad_norm": 1.1271792650222778, |
|
"learning_rate": 4.633962264150943e-06, |
|
"loss": 0.2179, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5012735608762099, |
|
"grad_norm": 1.1837356090545654, |
|
"learning_rate": 4.630188679245283e-06, |
|
"loss": 0.2334, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.5033112582781457, |
|
"grad_norm": 1.2714848518371582, |
|
"learning_rate": 4.626415094339623e-06, |
|
"loss": 0.263, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.5053489556800815, |
|
"grad_norm": 1.235137701034546, |
|
"learning_rate": 4.622641509433963e-06, |
|
"loss": 0.2551, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.5073866530820174, |
|
"grad_norm": 1.067122220993042, |
|
"learning_rate": 4.618867924528302e-06, |
|
"loss": 0.2397, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.5094243504839532, |
|
"grad_norm": 1.3224409818649292, |
|
"learning_rate": 4.615094339622642e-06, |
|
"loss": 0.2407, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5114620478858889, |
|
"grad_norm": 1.1779237985610962, |
|
"learning_rate": 4.611320754716981e-06, |
|
"loss": 0.2297, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.5134997452878247, |
|
"grad_norm": 1.3744945526123047, |
|
"learning_rate": 4.607547169811321e-06, |
|
"loss": 0.241, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.5155374426897605, |
|
"grad_norm": 1.198855996131897, |
|
"learning_rate": 4.603773584905661e-06, |
|
"loss": 0.2352, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.5175751400916964, |
|
"grad_norm": 1.117774486541748, |
|
"learning_rate": 4.600000000000001e-06, |
|
"loss": 0.2418, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.5196128374936322, |
|
"grad_norm": 1.248888611793518, |
|
"learning_rate": 4.59622641509434e-06, |
|
"loss": 0.2244, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.521650534895568, |
|
"grad_norm": 1.2080906629562378, |
|
"learning_rate": 4.59245283018868e-06, |
|
"loss": 0.229, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.5236882322975038, |
|
"grad_norm": 1.1990790367126465, |
|
"learning_rate": 4.588679245283019e-06, |
|
"loss": 0.2253, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.5257259296994397, |
|
"grad_norm": 1.1284271478652954, |
|
"learning_rate": 4.584905660377359e-06, |
|
"loss": 0.2381, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.5277636271013755, |
|
"grad_norm": 1.2414554357528687, |
|
"learning_rate": 4.581132075471699e-06, |
|
"loss": 0.2332, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.5298013245033113, |
|
"grad_norm": 1.0976932048797607, |
|
"learning_rate": 4.577358490566038e-06, |
|
"loss": 0.2331, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5318390219052471, |
|
"grad_norm": 1.2075899839401245, |
|
"learning_rate": 4.573584905660378e-06, |
|
"loss": 0.2413, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.5338767193071828, |
|
"grad_norm": 1.1429880857467651, |
|
"learning_rate": 4.569811320754718e-06, |
|
"loss": 0.23, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.5359144167091187, |
|
"grad_norm": 1.0777602195739746, |
|
"learning_rate": 4.566037735849057e-06, |
|
"loss": 0.2185, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.5379521141110545, |
|
"grad_norm": 1.143699288368225, |
|
"learning_rate": 4.562264150943397e-06, |
|
"loss": 0.2324, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.5399898115129903, |
|
"grad_norm": 1.3619898557662964, |
|
"learning_rate": 4.558490566037736e-06, |
|
"loss": 0.2484, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5420275089149261, |
|
"grad_norm": 1.2638384103775024, |
|
"learning_rate": 4.554716981132076e-06, |
|
"loss": 0.2349, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.544065206316862, |
|
"grad_norm": 1.2247638702392578, |
|
"learning_rate": 4.5509433962264155e-06, |
|
"loss": 0.2372, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.5461029037187978, |
|
"grad_norm": 1.2362172603607178, |
|
"learning_rate": 4.547169811320755e-06, |
|
"loss": 0.2333, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.5481406011207336, |
|
"grad_norm": 1.2307566404342651, |
|
"learning_rate": 4.543396226415095e-06, |
|
"loss": 0.2411, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.5501782985226694, |
|
"grad_norm": 1.2503217458724976, |
|
"learning_rate": 4.539622641509434e-06, |
|
"loss": 0.2459, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5522159959246052, |
|
"grad_norm": 1.2475491762161255, |
|
"learning_rate": 4.5358490566037735e-06, |
|
"loss": 0.2272, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.5542536933265411, |
|
"grad_norm": 1.105730414390564, |
|
"learning_rate": 4.532075471698113e-06, |
|
"loss": 0.2481, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.5562913907284768, |
|
"grad_norm": 1.275002121925354, |
|
"learning_rate": 4.528301886792453e-06, |
|
"loss": 0.2445, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.5583290881304126, |
|
"grad_norm": 1.1774675846099854, |
|
"learning_rate": 4.524528301886793e-06, |
|
"loss": 0.2377, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.5603667855323484, |
|
"grad_norm": 1.329745888710022, |
|
"learning_rate": 4.520754716981132e-06, |
|
"loss": 0.2319, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5624044829342842, |
|
"grad_norm": 1.2236435413360596, |
|
"learning_rate": 4.516981132075472e-06, |
|
"loss": 0.2274, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.5644421803362201, |
|
"grad_norm": 1.0417534112930298, |
|
"learning_rate": 4.513207547169812e-06, |
|
"loss": 0.2234, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.5664798777381559, |
|
"grad_norm": 1.0934056043624878, |
|
"learning_rate": 4.509433962264151e-06, |
|
"loss": 0.2445, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.5685175751400917, |
|
"grad_norm": 1.2551244497299194, |
|
"learning_rate": 4.505660377358491e-06, |
|
"loss": 0.2434, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.5705552725420275, |
|
"grad_norm": 1.2088017463684082, |
|
"learning_rate": 4.50188679245283e-06, |
|
"loss": 0.2399, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5725929699439634, |
|
"grad_norm": 1.2738829851150513, |
|
"learning_rate": 4.49811320754717e-06, |
|
"loss": 0.2328, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.5746306673458992, |
|
"grad_norm": 1.312220811843872, |
|
"learning_rate": 4.49433962264151e-06, |
|
"loss": 0.2334, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.5766683647478349, |
|
"grad_norm": 1.1316941976547241, |
|
"learning_rate": 4.49056603773585e-06, |
|
"loss": 0.225, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.5787060621497707, |
|
"grad_norm": 1.0500327348709106, |
|
"learning_rate": 4.486792452830189e-06, |
|
"loss": 0.226, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.5807437595517065, |
|
"grad_norm": 1.0962241888046265, |
|
"learning_rate": 4.483018867924528e-06, |
|
"loss": 0.2168, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.5827814569536424, |
|
"grad_norm": 1.100046992301941, |
|
"learning_rate": 4.479245283018868e-06, |
|
"loss": 0.2267, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.5848191543555782, |
|
"grad_norm": 1.2387049198150635, |
|
"learning_rate": 4.475471698113208e-06, |
|
"loss": 0.2509, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.586856851757514, |
|
"grad_norm": 1.1974563598632812, |
|
"learning_rate": 4.471698113207548e-06, |
|
"loss": 0.2351, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.5888945491594498, |
|
"grad_norm": 1.2102775573730469, |
|
"learning_rate": 4.467924528301888e-06, |
|
"loss": 0.2474, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.5909322465613857, |
|
"grad_norm": 1.0824848413467407, |
|
"learning_rate": 4.464150943396227e-06, |
|
"loss": 0.2289, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5929699439633215, |
|
"grad_norm": 1.111902117729187, |
|
"learning_rate": 4.460377358490567e-06, |
|
"loss": 0.2363, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.5950076413652573, |
|
"grad_norm": 1.1692800521850586, |
|
"learning_rate": 4.456603773584906e-06, |
|
"loss": 0.2266, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.5970453387671931, |
|
"grad_norm": 1.160117506980896, |
|
"learning_rate": 4.452830188679246e-06, |
|
"loss": 0.2351, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.5990830361691288, |
|
"grad_norm": 1.1320550441741943, |
|
"learning_rate": 4.4490566037735856e-06, |
|
"loss": 0.2239, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.6011207335710647, |
|
"grad_norm": 1.1472080945968628, |
|
"learning_rate": 4.445283018867925e-06, |
|
"loss": 0.2157, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6031584309730005, |
|
"grad_norm": 1.2992992401123047, |
|
"learning_rate": 4.4415094339622646e-06, |
|
"loss": 0.2283, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.6051961283749363, |
|
"grad_norm": 1.2557927370071411, |
|
"learning_rate": 4.4377358490566045e-06, |
|
"loss": 0.2339, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.6072338257768721, |
|
"grad_norm": 1.0591647624969482, |
|
"learning_rate": 4.4339622641509435e-06, |
|
"loss": 0.2152, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.609271523178808, |
|
"grad_norm": 1.0702134370803833, |
|
"learning_rate": 4.4301886792452834e-06, |
|
"loss": 0.206, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.6113092205807438, |
|
"grad_norm": 1.2004814147949219, |
|
"learning_rate": 4.4264150943396225e-06, |
|
"loss": 0.2307, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6133469179826796, |
|
"grad_norm": 1.1907483339309692, |
|
"learning_rate": 4.4226415094339624e-06, |
|
"loss": 0.2289, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.6153846153846154, |
|
"grad_norm": 1.1154402494430542, |
|
"learning_rate": 4.418867924528302e-06, |
|
"loss": 0.2135, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.6174223127865512, |
|
"grad_norm": 1.1816260814666748, |
|
"learning_rate": 4.415094339622642e-06, |
|
"loss": 0.2286, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.6194600101884871, |
|
"grad_norm": 1.1875680685043335, |
|
"learning_rate": 4.411320754716981e-06, |
|
"loss": 0.2375, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.6214977075904228, |
|
"grad_norm": 1.0976321697235107, |
|
"learning_rate": 4.407547169811321e-06, |
|
"loss": 0.2137, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6235354049923586, |
|
"grad_norm": 1.10517418384552, |
|
"learning_rate": 4.40377358490566e-06, |
|
"loss": 0.219, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.6255731023942944, |
|
"grad_norm": 1.1795883178710938, |
|
"learning_rate": 4.4e-06, |
|
"loss": 0.2286, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.6276107997962302, |
|
"grad_norm": 1.1178569793701172, |
|
"learning_rate": 4.39622641509434e-06, |
|
"loss": 0.22, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.6296484971981661, |
|
"grad_norm": 1.1791189908981323, |
|
"learning_rate": 4.39245283018868e-06, |
|
"loss": 0.2474, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.6316861946001019, |
|
"grad_norm": 1.1312475204467773, |
|
"learning_rate": 4.388679245283019e-06, |
|
"loss": 0.2474, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6337238920020377, |
|
"grad_norm": 1.1903657913208008, |
|
"learning_rate": 4.384905660377359e-06, |
|
"loss": 0.2477, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.6357615894039735, |
|
"grad_norm": 1.1177330017089844, |
|
"learning_rate": 4.381132075471698e-06, |
|
"loss": 0.2412, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.6377992868059094, |
|
"grad_norm": 1.3050440549850464, |
|
"learning_rate": 4.377358490566038e-06, |
|
"loss": 0.2465, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.6398369842078452, |
|
"grad_norm": 1.1658434867858887, |
|
"learning_rate": 4.373584905660378e-06, |
|
"loss": 0.2162, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.6418746816097809, |
|
"grad_norm": 1.1645337343215942, |
|
"learning_rate": 4.369811320754717e-06, |
|
"loss": 0.2369, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6439123790117167, |
|
"grad_norm": 1.1002851724624634, |
|
"learning_rate": 4.366037735849057e-06, |
|
"loss": 0.1995, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.6459500764136525, |
|
"grad_norm": 1.1481510400772095, |
|
"learning_rate": 4.362264150943397e-06, |
|
"loss": 0.2324, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.6479877738155884, |
|
"grad_norm": 1.2481803894042969, |
|
"learning_rate": 4.358490566037737e-06, |
|
"loss": 0.2327, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.6500254712175242, |
|
"grad_norm": 1.1513328552246094, |
|
"learning_rate": 4.354716981132076e-06, |
|
"loss": 0.2585, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.65206316861946, |
|
"grad_norm": 1.1497119665145874, |
|
"learning_rate": 4.350943396226415e-06, |
|
"loss": 0.233, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6541008660213958, |
|
"grad_norm": 1.1609100103378296, |
|
"learning_rate": 4.347169811320755e-06, |
|
"loss": 0.2405, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.6561385634233317, |
|
"grad_norm": 1.2081102132797241, |
|
"learning_rate": 4.343396226415095e-06, |
|
"loss": 0.2378, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.6581762608252675, |
|
"grad_norm": 1.1453851461410522, |
|
"learning_rate": 4.339622641509435e-06, |
|
"loss": 0.2298, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.6602139582272033, |
|
"grad_norm": 1.051963210105896, |
|
"learning_rate": 4.3358490566037745e-06, |
|
"loss": 0.2424, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.6622516556291391, |
|
"grad_norm": 1.122475028038025, |
|
"learning_rate": 4.332075471698114e-06, |
|
"loss": 0.2217, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6642893530310748, |
|
"grad_norm": 1.557624101638794, |
|
"learning_rate": 4.3283018867924535e-06, |
|
"loss": 0.2476, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.6663270504330107, |
|
"grad_norm": 1.133476972579956, |
|
"learning_rate": 4.324528301886793e-06, |
|
"loss": 0.24, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.6683647478349465, |
|
"grad_norm": 1.192134976387024, |
|
"learning_rate": 4.3207547169811325e-06, |
|
"loss": 0.2285, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.6704024452368823, |
|
"grad_norm": 1.0376332998275757, |
|
"learning_rate": 4.316981132075472e-06, |
|
"loss": 0.2314, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.6724401426388181, |
|
"grad_norm": 1.1142336130142212, |
|
"learning_rate": 4.3132075471698115e-06, |
|
"loss": 0.2213, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.674477840040754, |
|
"grad_norm": 1.075834035873413, |
|
"learning_rate": 4.309433962264151e-06, |
|
"loss": 0.2541, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.6765155374426898, |
|
"grad_norm": 1.6311166286468506, |
|
"learning_rate": 4.305660377358491e-06, |
|
"loss": 0.2273, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.6785532348446256, |
|
"grad_norm": 1.2183853387832642, |
|
"learning_rate": 4.30188679245283e-06, |
|
"loss": 0.2235, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.6805909322465614, |
|
"grad_norm": 1.115402340888977, |
|
"learning_rate": 4.29811320754717e-06, |
|
"loss": 0.2241, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.6826286296484972, |
|
"grad_norm": 1.1034786701202393, |
|
"learning_rate": 4.294339622641509e-06, |
|
"loss": 0.2397, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.6846663270504331, |
|
"grad_norm": 1.1569246053695679, |
|
"learning_rate": 4.290566037735849e-06, |
|
"loss": 0.231, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.6867040244523688, |
|
"grad_norm": 1.0261273384094238, |
|
"learning_rate": 4.286792452830189e-06, |
|
"loss": 0.2381, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.6887417218543046, |
|
"grad_norm": 1.1715890169143677, |
|
"learning_rate": 4.283018867924529e-06, |
|
"loss": 0.2271, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.6907794192562404, |
|
"grad_norm": 1.1164259910583496, |
|
"learning_rate": 4.279245283018868e-06, |
|
"loss": 0.2145, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.6928171166581762, |
|
"grad_norm": 1.1052844524383545, |
|
"learning_rate": 4.275471698113208e-06, |
|
"loss": 0.2303, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.6948548140601121, |
|
"grad_norm": 1.193002700805664, |
|
"learning_rate": 4.271698113207547e-06, |
|
"loss": 0.2329, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.6968925114620479, |
|
"grad_norm": 1.141808032989502, |
|
"learning_rate": 4.267924528301887e-06, |
|
"loss": 0.2293, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.6989302088639837, |
|
"grad_norm": 1.0740857124328613, |
|
"learning_rate": 4.264150943396227e-06, |
|
"loss": 0.2336, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.7009679062659195, |
|
"grad_norm": 1.1825228929519653, |
|
"learning_rate": 4.260377358490567e-06, |
|
"loss": 0.2367, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.7030056036678554, |
|
"grad_norm": 1.0624991655349731, |
|
"learning_rate": 4.256603773584906e-06, |
|
"loss": 0.2246, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7050433010697912, |
|
"grad_norm": 1.292654037475586, |
|
"learning_rate": 4.252830188679246e-06, |
|
"loss": 0.2479, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.7070809984717269, |
|
"grad_norm": 1.0635449886322021, |
|
"learning_rate": 4.249056603773585e-06, |
|
"loss": 0.2285, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.7091186958736627, |
|
"grad_norm": 1.0410432815551758, |
|
"learning_rate": 4.245283018867925e-06, |
|
"loss": 0.203, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.7111563932755985, |
|
"grad_norm": 1.154789924621582, |
|
"learning_rate": 4.241509433962264e-06, |
|
"loss": 0.2424, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.7131940906775344, |
|
"grad_norm": 1.1573512554168701, |
|
"learning_rate": 4.237735849056604e-06, |
|
"loss": 0.2163, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7152317880794702, |
|
"grad_norm": 1.0690231323242188, |
|
"learning_rate": 4.233962264150944e-06, |
|
"loss": 0.2204, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.717269485481406, |
|
"grad_norm": 1.1083498001098633, |
|
"learning_rate": 4.230188679245284e-06, |
|
"loss": 0.2258, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.7193071828833418, |
|
"grad_norm": 1.260735273361206, |
|
"learning_rate": 4.226415094339623e-06, |
|
"loss": 0.236, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.7213448802852777, |
|
"grad_norm": 1.0777976512908936, |
|
"learning_rate": 4.222641509433963e-06, |
|
"loss": 0.217, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.7233825776872135, |
|
"grad_norm": 1.0879008769989014, |
|
"learning_rate": 4.218867924528302e-06, |
|
"loss": 0.2163, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7254202750891493, |
|
"grad_norm": 1.1055690050125122, |
|
"learning_rate": 4.215094339622642e-06, |
|
"loss": 0.2244, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.7274579724910851, |
|
"grad_norm": 1.1160818338394165, |
|
"learning_rate": 4.2113207547169815e-06, |
|
"loss": 0.2081, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.7294956698930208, |
|
"grad_norm": 1.238552212715149, |
|
"learning_rate": 4.2075471698113215e-06, |
|
"loss": 0.2242, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.7315333672949567, |
|
"grad_norm": 1.0889108180999756, |
|
"learning_rate": 4.2037735849056605e-06, |
|
"loss": 0.2276, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.7335710646968925, |
|
"grad_norm": 1.314106822013855, |
|
"learning_rate": 4.2000000000000004e-06, |
|
"loss": 0.2423, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7356087620988283, |
|
"grad_norm": 1.304366111755371, |
|
"learning_rate": 4.1962264150943395e-06, |
|
"loss": 0.2556, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.7376464595007641, |
|
"grad_norm": 1.227425217628479, |
|
"learning_rate": 4.1924528301886794e-06, |
|
"loss": 0.2275, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.7396841569027, |
|
"grad_norm": 1.1975058317184448, |
|
"learning_rate": 4.188679245283019e-06, |
|
"loss": 0.2376, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.7417218543046358, |
|
"grad_norm": 1.1609851121902466, |
|
"learning_rate": 4.184905660377358e-06, |
|
"loss": 0.2296, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.7437595517065716, |
|
"grad_norm": 1.1305787563323975, |
|
"learning_rate": 4.181132075471698e-06, |
|
"loss": 0.231, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7457972491085074, |
|
"grad_norm": 1.245123267173767, |
|
"learning_rate": 4.177358490566038e-06, |
|
"loss": 0.2438, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.7478349465104432, |
|
"grad_norm": 1.2077217102050781, |
|
"learning_rate": 4.173584905660378e-06, |
|
"loss": 0.2331, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.7498726439123791, |
|
"grad_norm": 1.2838149070739746, |
|
"learning_rate": 4.169811320754717e-06, |
|
"loss": 0.2205, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.7519103413143148, |
|
"grad_norm": 1.2761950492858887, |
|
"learning_rate": 4.166037735849056e-06, |
|
"loss": 0.2339, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.7539480387162506, |
|
"grad_norm": 1.2258546352386475, |
|
"learning_rate": 4.162264150943396e-06, |
|
"loss": 0.2494, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7559857361181864, |
|
"grad_norm": 1.0878491401672363, |
|
"learning_rate": 4.158490566037736e-06, |
|
"loss": 0.2337, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.7580234335201222, |
|
"grad_norm": 1.4389631748199463, |
|
"learning_rate": 4.154716981132076e-06, |
|
"loss": 0.2409, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.7600611309220581, |
|
"grad_norm": 1.0960638523101807, |
|
"learning_rate": 4.150943396226416e-06, |
|
"loss": 0.2239, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.7620988283239939, |
|
"grad_norm": 1.293862223625183, |
|
"learning_rate": 4.147169811320755e-06, |
|
"loss": 0.2438, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.7641365257259297, |
|
"grad_norm": 1.177188754081726, |
|
"learning_rate": 4.143396226415095e-06, |
|
"loss": 0.2241, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.7661742231278655, |
|
"grad_norm": 1.2292778491973877, |
|
"learning_rate": 4.139622641509434e-06, |
|
"loss": 0.2386, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.7682119205298014, |
|
"grad_norm": 1.1312750577926636, |
|
"learning_rate": 4.135849056603774e-06, |
|
"loss": 0.2331, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.7702496179317372, |
|
"grad_norm": 1.0975465774536133, |
|
"learning_rate": 4.132075471698114e-06, |
|
"loss": 0.2213, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.7722873153336729, |
|
"grad_norm": 1.2238826751708984, |
|
"learning_rate": 4.128301886792453e-06, |
|
"loss": 0.2338, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.7743250127356087, |
|
"grad_norm": 1.3611332178115845, |
|
"learning_rate": 4.124528301886793e-06, |
|
"loss": 0.2454, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.7763627101375445, |
|
"grad_norm": 1.3693833351135254, |
|
"learning_rate": 4.120754716981133e-06, |
|
"loss": 0.2434, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.7784004075394804, |
|
"grad_norm": 1.2046077251434326, |
|
"learning_rate": 4.116981132075472e-06, |
|
"loss": 0.224, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.7804381049414162, |
|
"grad_norm": 1.2061010599136353, |
|
"learning_rate": 4.113207547169812e-06, |
|
"loss": 0.2264, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.782475802343352, |
|
"grad_norm": 1.0464826822280884, |
|
"learning_rate": 4.109433962264151e-06, |
|
"loss": 0.2111, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.7845134997452878, |
|
"grad_norm": 0.9789960980415344, |
|
"learning_rate": 4.105660377358491e-06, |
|
"loss": 0.2082, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.7865511971472237, |
|
"grad_norm": 1.1676138639450073, |
|
"learning_rate": 4.101886792452831e-06, |
|
"loss": 0.2283, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.7885888945491595, |
|
"grad_norm": 1.179202914237976, |
|
"learning_rate": 4.0981132075471705e-06, |
|
"loss": 0.2329, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.7906265919510953, |
|
"grad_norm": 1.2767287492752075, |
|
"learning_rate": 4.09433962264151e-06, |
|
"loss": 0.2378, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.7926642893530311, |
|
"grad_norm": 1.1678310632705688, |
|
"learning_rate": 4.0905660377358495e-06, |
|
"loss": 0.2232, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.7947019867549668, |
|
"grad_norm": 1.2610273361206055, |
|
"learning_rate": 4.0867924528301886e-06, |
|
"loss": 0.2384, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.7967396841569027, |
|
"grad_norm": 1.3496994972229004, |
|
"learning_rate": 4.0830188679245285e-06, |
|
"loss": 0.2346, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.7987773815588385, |
|
"grad_norm": 1.163509488105774, |
|
"learning_rate": 4.079245283018868e-06, |
|
"loss": 0.2234, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.8008150789607743, |
|
"grad_norm": 1.1540744304656982, |
|
"learning_rate": 4.075471698113208e-06, |
|
"loss": 0.2164, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.8028527763627101, |
|
"grad_norm": 1.158379316329956, |
|
"learning_rate": 4.071698113207547e-06, |
|
"loss": 0.2323, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.804890473764646, |
|
"grad_norm": 1.1848655939102173, |
|
"learning_rate": 4.067924528301887e-06, |
|
"loss": 0.2448, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8069281711665818, |
|
"grad_norm": 1.239961862564087, |
|
"learning_rate": 4.064150943396226e-06, |
|
"loss": 0.2343, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.8089658685685176, |
|
"grad_norm": 1.0600473880767822, |
|
"learning_rate": 4.060377358490566e-06, |
|
"loss": 0.2333, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.8110035659704534, |
|
"grad_norm": 1.2741254568099976, |
|
"learning_rate": 4.056603773584906e-06, |
|
"loss": 0.237, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.8130412633723892, |
|
"grad_norm": 1.182904839515686, |
|
"learning_rate": 4.052830188679245e-06, |
|
"loss": 0.2217, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.8150789607743251, |
|
"grad_norm": 1.1751116514205933, |
|
"learning_rate": 4.049056603773585e-06, |
|
"loss": 0.2354, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8171166581762608, |
|
"grad_norm": 1.134203553199768, |
|
"learning_rate": 4.045283018867925e-06, |
|
"loss": 0.2297, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.8191543555781966, |
|
"grad_norm": 1.1873515844345093, |
|
"learning_rate": 4.041509433962265e-06, |
|
"loss": 0.2476, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.8211920529801324, |
|
"grad_norm": 1.1874173879623413, |
|
"learning_rate": 4.037735849056604e-06, |
|
"loss": 0.2232, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.8232297503820682, |
|
"grad_norm": 1.119139552116394, |
|
"learning_rate": 4.033962264150943e-06, |
|
"loss": 0.2348, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.8252674477840041, |
|
"grad_norm": 1.1560324430465698, |
|
"learning_rate": 4.030188679245283e-06, |
|
"loss": 0.2337, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8273051451859399, |
|
"grad_norm": 1.1288225650787354, |
|
"learning_rate": 4.026415094339623e-06, |
|
"loss": 0.2319, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.8293428425878757, |
|
"grad_norm": 1.2800090312957764, |
|
"learning_rate": 4.022641509433963e-06, |
|
"loss": 0.2237, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.8313805399898115, |
|
"grad_norm": 1.2394243478775024, |
|
"learning_rate": 4.018867924528303e-06, |
|
"loss": 0.2358, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.8334182373917474, |
|
"grad_norm": 1.231703758239746, |
|
"learning_rate": 4.015094339622642e-06, |
|
"loss": 0.2275, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.8354559347936832, |
|
"grad_norm": 1.0887949466705322, |
|
"learning_rate": 4.011320754716982e-06, |
|
"loss": 0.2335, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8374936321956189, |
|
"grad_norm": 1.2228186130523682, |
|
"learning_rate": 4.007547169811321e-06, |
|
"loss": 0.2307, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.8395313295975547, |
|
"grad_norm": 1.0364912748336792, |
|
"learning_rate": 4.003773584905661e-06, |
|
"loss": 0.2117, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.8415690269994905, |
|
"grad_norm": 1.0746346712112427, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 0.2181, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.8436067244014264, |
|
"grad_norm": 1.0695878267288208, |
|
"learning_rate": 3.99622641509434e-06, |
|
"loss": 0.2497, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.8456444218033622, |
|
"grad_norm": 1.2379292249679565, |
|
"learning_rate": 3.99245283018868e-06, |
|
"loss": 0.2236, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.847682119205298, |
|
"grad_norm": 1.0842210054397583, |
|
"learning_rate": 3.9886792452830196e-06, |
|
"loss": 0.2268, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.8497198166072338, |
|
"grad_norm": 1.2367124557495117, |
|
"learning_rate": 3.984905660377359e-06, |
|
"loss": 0.2366, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.8517575140091697, |
|
"grad_norm": 1.2747502326965332, |
|
"learning_rate": 3.9811320754716985e-06, |
|
"loss": 0.2371, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.8537952114111055, |
|
"grad_norm": 1.1272820234298706, |
|
"learning_rate": 3.977358490566038e-06, |
|
"loss": 0.2369, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.8558329088130413, |
|
"grad_norm": 1.0960078239440918, |
|
"learning_rate": 3.9735849056603775e-06, |
|
"loss": 0.2383, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.8578706062149771, |
|
"grad_norm": 1.1670606136322021, |
|
"learning_rate": 3.9698113207547174e-06, |
|
"loss": 0.2511, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.8599083036169128, |
|
"grad_norm": 1.0942180156707764, |
|
"learning_rate": 3.966037735849057e-06, |
|
"loss": 0.2319, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.8619460010188487, |
|
"grad_norm": 1.1233775615692139, |
|
"learning_rate": 3.962264150943396e-06, |
|
"loss": 0.2144, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.8639836984207845, |
|
"grad_norm": 1.2059624195098877, |
|
"learning_rate": 3.958490566037736e-06, |
|
"loss": 0.2212, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.8660213958227203, |
|
"grad_norm": 1.1963043212890625, |
|
"learning_rate": 3.954716981132075e-06, |
|
"loss": 0.2378, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.8680590932246561, |
|
"grad_norm": 1.2415270805358887, |
|
"learning_rate": 3.950943396226415e-06, |
|
"loss": 0.2276, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.870096790626592, |
|
"grad_norm": 1.3280036449432373, |
|
"learning_rate": 3.947169811320755e-06, |
|
"loss": 0.2395, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.8721344880285278, |
|
"grad_norm": 1.2570695877075195, |
|
"learning_rate": 3.943396226415095e-06, |
|
"loss": 0.2474, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.8741721854304636, |
|
"grad_norm": 1.1252264976501465, |
|
"learning_rate": 3.939622641509434e-06, |
|
"loss": 0.2265, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.8762098828323994, |
|
"grad_norm": 1.0487228631973267, |
|
"learning_rate": 3.935849056603774e-06, |
|
"loss": 0.2224, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.8782475802343352, |
|
"grad_norm": 1.0646063089370728, |
|
"learning_rate": 3.932075471698113e-06, |
|
"loss": 0.2232, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.8802852776362711, |
|
"grad_norm": 1.1609469652175903, |
|
"learning_rate": 3.928301886792453e-06, |
|
"loss": 0.2347, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.8823229750382068, |
|
"grad_norm": 1.0545512437820435, |
|
"learning_rate": 3.924528301886793e-06, |
|
"loss": 0.2251, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.8843606724401426, |
|
"grad_norm": 1.1264142990112305, |
|
"learning_rate": 3.920754716981132e-06, |
|
"loss": 0.2459, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.8863983698420784, |
|
"grad_norm": 1.1396156549453735, |
|
"learning_rate": 3.916981132075472e-06, |
|
"loss": 0.2385, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.8884360672440142, |
|
"grad_norm": 1.17756187915802, |
|
"learning_rate": 3.913207547169812e-06, |
|
"loss": 0.2306, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.8904737646459501, |
|
"grad_norm": 1.0548409223556519, |
|
"learning_rate": 3.909433962264151e-06, |
|
"loss": 0.2192, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.8925114620478859, |
|
"grad_norm": 1.161879062652588, |
|
"learning_rate": 3.905660377358491e-06, |
|
"loss": 0.2264, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.8945491594498217, |
|
"grad_norm": 1.1480745077133179, |
|
"learning_rate": 3.90188679245283e-06, |
|
"loss": 0.2389, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.8965868568517575, |
|
"grad_norm": 1.0667020082473755, |
|
"learning_rate": 3.89811320754717e-06, |
|
"loss": 0.2312, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.8986245542536934, |
|
"grad_norm": 1.2451261281967163, |
|
"learning_rate": 3.89433962264151e-06, |
|
"loss": 0.241, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.9006622516556292, |
|
"grad_norm": 1.2452954053878784, |
|
"learning_rate": 3.89056603773585e-06, |
|
"loss": 0.2444, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.9026999490575649, |
|
"grad_norm": 1.134698510169983, |
|
"learning_rate": 3.88679245283019e-06, |
|
"loss": 0.2132, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.9047376464595007, |
|
"grad_norm": 1.269184947013855, |
|
"learning_rate": 3.883018867924529e-06, |
|
"loss": 0.2445, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.9067753438614365, |
|
"grad_norm": 1.2156351804733276, |
|
"learning_rate": 3.879245283018868e-06, |
|
"loss": 0.2469, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.9088130412633724, |
|
"grad_norm": 1.1011265516281128, |
|
"learning_rate": 3.875471698113208e-06, |
|
"loss": 0.2307, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.9108507386653082, |
|
"grad_norm": 1.08492910861969, |
|
"learning_rate": 3.871698113207548e-06, |
|
"loss": 0.2228, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.912888436067244, |
|
"grad_norm": 1.1414035558700562, |
|
"learning_rate": 3.8679245283018875e-06, |
|
"loss": 0.2191, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.9149261334691798, |
|
"grad_norm": 1.0980679988861084, |
|
"learning_rate": 3.8641509433962266e-06, |
|
"loss": 0.2323, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.9169638308711157, |
|
"grad_norm": 1.1721632480621338, |
|
"learning_rate": 3.8603773584905665e-06, |
|
"loss": 0.2457, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9190015282730515, |
|
"grad_norm": 1.1284496784210205, |
|
"learning_rate": 3.856603773584906e-06, |
|
"loss": 0.2326, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.9210392256749873, |
|
"grad_norm": 1.0117298364639282, |
|
"learning_rate": 3.8528301886792455e-06, |
|
"loss": 0.2389, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.9230769230769231, |
|
"grad_norm": 1.173325777053833, |
|
"learning_rate": 3.849056603773585e-06, |
|
"loss": 0.2304, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.9251146204788588, |
|
"grad_norm": 1.0675781965255737, |
|
"learning_rate": 3.8452830188679245e-06, |
|
"loss": 0.2178, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.9271523178807947, |
|
"grad_norm": 1.0862107276916504, |
|
"learning_rate": 3.841509433962264e-06, |
|
"loss": 0.2293, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9291900152827305, |
|
"grad_norm": 1.119224190711975, |
|
"learning_rate": 3.837735849056604e-06, |
|
"loss": 0.2228, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.9312277126846663, |
|
"grad_norm": 1.0795427560806274, |
|
"learning_rate": 3.833962264150944e-06, |
|
"loss": 0.2235, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.9332654100866021, |
|
"grad_norm": 1.1415457725524902, |
|
"learning_rate": 3.830188679245283e-06, |
|
"loss": 0.2272, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.935303107488538, |
|
"grad_norm": 1.1307644844055176, |
|
"learning_rate": 3.826415094339623e-06, |
|
"loss": 0.2186, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.9373408048904738, |
|
"grad_norm": 1.1211094856262207, |
|
"learning_rate": 3.822641509433962e-06, |
|
"loss": 0.2233, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9393785022924096, |
|
"grad_norm": 1.1230515241622925, |
|
"learning_rate": 3.818867924528302e-06, |
|
"loss": 0.2318, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.9414161996943454, |
|
"grad_norm": 1.2053518295288086, |
|
"learning_rate": 3.815094339622642e-06, |
|
"loss": 0.2225, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.9434538970962812, |
|
"grad_norm": 1.1487395763397217, |
|
"learning_rate": 3.8113207547169816e-06, |
|
"loss": 0.23, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.9454915944982171, |
|
"grad_norm": 1.03309166431427, |
|
"learning_rate": 3.807547169811321e-06, |
|
"loss": 0.2163, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.9475292919001528, |
|
"grad_norm": 1.2096184492111206, |
|
"learning_rate": 3.8037735849056605e-06, |
|
"loss": 0.2312, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9495669893020886, |
|
"grad_norm": 1.5864837169647217, |
|
"learning_rate": 3.8000000000000005e-06, |
|
"loss": 0.227, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.9516046867040244, |
|
"grad_norm": 1.1054576635360718, |
|
"learning_rate": 3.79622641509434e-06, |
|
"loss": 0.2303, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.9536423841059603, |
|
"grad_norm": 1.0742146968841553, |
|
"learning_rate": 3.79245283018868e-06, |
|
"loss": 0.2282, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.9556800815078961, |
|
"grad_norm": 1.048632025718689, |
|
"learning_rate": 3.788679245283019e-06, |
|
"loss": 0.222, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.9577177789098319, |
|
"grad_norm": 1.1467828750610352, |
|
"learning_rate": 3.784905660377359e-06, |
|
"loss": 0.2169, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9597554763117677, |
|
"grad_norm": 1.1006637811660767, |
|
"learning_rate": 3.7811320754716983e-06, |
|
"loss": 0.227, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.9617931737137035, |
|
"grad_norm": 1.4877111911773682, |
|
"learning_rate": 3.7773584905660383e-06, |
|
"loss": 0.2207, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.9638308711156394, |
|
"grad_norm": 1.174248456954956, |
|
"learning_rate": 3.7735849056603777e-06, |
|
"loss": 0.2257, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.9658685685175752, |
|
"grad_norm": 1.0940933227539062, |
|
"learning_rate": 3.7698113207547172e-06, |
|
"loss": 0.2265, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.9679062659195109, |
|
"grad_norm": 1.0824356079101562, |
|
"learning_rate": 3.7660377358490567e-06, |
|
"loss": 0.2261, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.9699439633214467, |
|
"grad_norm": 1.0655136108398438, |
|
"learning_rate": 3.7622641509433966e-06, |
|
"loss": 0.2148, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.9719816607233825, |
|
"grad_norm": 1.3165481090545654, |
|
"learning_rate": 3.758490566037736e-06, |
|
"loss": 0.2337, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.9740193581253184, |
|
"grad_norm": 1.0988367795944214, |
|
"learning_rate": 3.754716981132076e-06, |
|
"loss": 0.1979, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.9760570555272542, |
|
"grad_norm": 1.0447558164596558, |
|
"learning_rate": 3.750943396226415e-06, |
|
"loss": 0.2325, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.97809475292919, |
|
"grad_norm": 1.1018916368484497, |
|
"learning_rate": 3.747169811320755e-06, |
|
"loss": 0.2161, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.9801324503311258, |
|
"grad_norm": 1.2155579328536987, |
|
"learning_rate": 3.7433962264150945e-06, |
|
"loss": 0.2191, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.9821701477330617, |
|
"grad_norm": 0.9788108468055725, |
|
"learning_rate": 3.7396226415094344e-06, |
|
"loss": 0.2282, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.9842078451349975, |
|
"grad_norm": 1.0340372323989868, |
|
"learning_rate": 3.7358490566037735e-06, |
|
"loss": 0.2276, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.9862455425369333, |
|
"grad_norm": 0.9971087574958801, |
|
"learning_rate": 3.7320754716981134e-06, |
|
"loss": 0.2176, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.9882832399388691, |
|
"grad_norm": 1.0751736164093018, |
|
"learning_rate": 3.728301886792453e-06, |
|
"loss": 0.2143, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.9903209373408048, |
|
"grad_norm": 1.188984751701355, |
|
"learning_rate": 3.724528301886793e-06, |
|
"loss": 0.2375, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.9923586347427407, |
|
"grad_norm": 1.320594072341919, |
|
"learning_rate": 3.7207547169811327e-06, |
|
"loss": 0.223, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.9943963321446765, |
|
"grad_norm": 1.1396737098693848, |
|
"learning_rate": 3.716981132075472e-06, |
|
"loss": 0.2413, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.9964340295466123, |
|
"grad_norm": 1.0497945547103882, |
|
"learning_rate": 3.7132075471698113e-06, |
|
"loss": 0.2177, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.9984717269485481, |
|
"grad_norm": 1.2380748987197876, |
|
"learning_rate": 3.709433962264151e-06, |
|
"loss": 0.2351, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.000509424350484, |
|
"grad_norm": 0.9542668461799622, |
|
"learning_rate": 3.705660377358491e-06, |
|
"loss": 0.2136, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 1.0025471217524198, |
|
"grad_norm": 0.9574536681175232, |
|
"learning_rate": 3.7018867924528306e-06, |
|
"loss": 0.1899, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.0045848191543556, |
|
"grad_norm": 1.0352755784988403, |
|
"learning_rate": 3.6981132075471697e-06, |
|
"loss": 0.1816, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.0066225165562914, |
|
"grad_norm": 1.0826165676116943, |
|
"learning_rate": 3.6943396226415096e-06, |
|
"loss": 0.1858, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.0086602139582272, |
|
"grad_norm": 1.2422000169754028, |
|
"learning_rate": 3.6905660377358495e-06, |
|
"loss": 0.1878, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.010697911360163, |
|
"grad_norm": 1.0961295366287231, |
|
"learning_rate": 3.686792452830189e-06, |
|
"loss": 0.1721, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.012735608762099, |
|
"grad_norm": 1.2105534076690674, |
|
"learning_rate": 3.683018867924529e-06, |
|
"loss": 0.1682, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.0147733061640347, |
|
"grad_norm": 1.0163434743881226, |
|
"learning_rate": 3.679245283018868e-06, |
|
"loss": 0.1745, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.0168110035659705, |
|
"grad_norm": 1.1357200145721436, |
|
"learning_rate": 3.675471698113208e-06, |
|
"loss": 0.174, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.0188487009679064, |
|
"grad_norm": 1.129521369934082, |
|
"learning_rate": 3.6716981132075474e-06, |
|
"loss": 0.1737, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.0208863983698422, |
|
"grad_norm": 1.1067070960998535, |
|
"learning_rate": 3.6679245283018873e-06, |
|
"loss": 0.1715, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.0229240957717778, |
|
"grad_norm": 1.3292362689971924, |
|
"learning_rate": 3.664150943396227e-06, |
|
"loss": 0.1719, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.0249617931737136, |
|
"grad_norm": 1.184263825416565, |
|
"learning_rate": 3.6603773584905663e-06, |
|
"loss": 0.1772, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 1.0269994905756494, |
|
"grad_norm": 1.2224076986312866, |
|
"learning_rate": 3.6566037735849058e-06, |
|
"loss": 0.1799, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.0290371879775853, |
|
"grad_norm": 1.2455564737319946, |
|
"learning_rate": 3.6528301886792457e-06, |
|
"loss": 0.1875, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.031074885379521, |
|
"grad_norm": 1.037973165512085, |
|
"learning_rate": 3.649056603773585e-06, |
|
"loss": 0.1751, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.033112582781457, |
|
"grad_norm": 1.3113584518432617, |
|
"learning_rate": 3.645283018867925e-06, |
|
"loss": 0.1805, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.0351502801833927, |
|
"grad_norm": 1.113845705986023, |
|
"learning_rate": 3.641509433962264e-06, |
|
"loss": 0.163, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.0371879775853285, |
|
"grad_norm": 1.1282869577407837, |
|
"learning_rate": 3.637735849056604e-06, |
|
"loss": 0.1774, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.0392256749872644, |
|
"grad_norm": 0.9915235042572021, |
|
"learning_rate": 3.6339622641509436e-06, |
|
"loss": 0.1676, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.0412633723892002, |
|
"grad_norm": 1.1076091527938843, |
|
"learning_rate": 3.6301886792452835e-06, |
|
"loss": 0.1811, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.043301069791136, |
|
"grad_norm": 1.4706580638885498, |
|
"learning_rate": 3.626415094339623e-06, |
|
"loss": 0.1749, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.0453387671930718, |
|
"grad_norm": 1.0995841026306152, |
|
"learning_rate": 3.6226415094339625e-06, |
|
"loss": 0.1682, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.0473764645950077, |
|
"grad_norm": 1.3873177766799927, |
|
"learning_rate": 3.618867924528302e-06, |
|
"loss": 0.1812, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.0494141619969435, |
|
"grad_norm": 1.1935499906539917, |
|
"learning_rate": 3.615094339622642e-06, |
|
"loss": 0.1876, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.0514518593988793, |
|
"grad_norm": 1.2057229280471802, |
|
"learning_rate": 3.6113207547169814e-06, |
|
"loss": 0.1815, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.0534895568008151, |
|
"grad_norm": 1.1333197355270386, |
|
"learning_rate": 3.6075471698113213e-06, |
|
"loss": 0.1828, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.055527254202751, |
|
"grad_norm": 1.0647273063659668, |
|
"learning_rate": 3.6037735849056603e-06, |
|
"loss": 0.181, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.0575649516046868, |
|
"grad_norm": 1.204564094543457, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"loss": 0.1825, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.0596026490066226, |
|
"grad_norm": 1.0661295652389526, |
|
"learning_rate": 3.5962264150943398e-06, |
|
"loss": 0.1864, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.0616403464085584, |
|
"grad_norm": 1.0870025157928467, |
|
"learning_rate": 3.5924528301886797e-06, |
|
"loss": 0.1789, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.0636780438104942, |
|
"grad_norm": 1.0620194673538208, |
|
"learning_rate": 3.588679245283019e-06, |
|
"loss": 0.1863, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.06571574121243, |
|
"grad_norm": 1.1938071250915527, |
|
"learning_rate": 3.5849056603773586e-06, |
|
"loss": 0.1863, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.0677534386143657, |
|
"grad_norm": 1.2299485206604004, |
|
"learning_rate": 3.581132075471698e-06, |
|
"loss": 0.1881, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.0697911360163015, |
|
"grad_norm": 1.043164610862732, |
|
"learning_rate": 3.577358490566038e-06, |
|
"loss": 0.1631, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.0718288334182373, |
|
"grad_norm": 1.200393795967102, |
|
"learning_rate": 3.5735849056603775e-06, |
|
"loss": 0.1871, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.0738665308201731, |
|
"grad_norm": 1.1729276180267334, |
|
"learning_rate": 3.5698113207547175e-06, |
|
"loss": 0.1776, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.075904228222109, |
|
"grad_norm": 1.3533014059066772, |
|
"learning_rate": 3.5660377358490565e-06, |
|
"loss": 0.1924, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.0779419256240448, |
|
"grad_norm": 1.1192210912704468, |
|
"learning_rate": 3.5622641509433964e-06, |
|
"loss": 0.1826, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.0799796230259806, |
|
"grad_norm": 1.2234528064727783, |
|
"learning_rate": 3.558490566037736e-06, |
|
"loss": 0.1803, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.0820173204279164, |
|
"grad_norm": 1.1349793672561646, |
|
"learning_rate": 3.554716981132076e-06, |
|
"loss": 0.1862, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.0840550178298523, |
|
"grad_norm": 1.1058518886566162, |
|
"learning_rate": 3.5509433962264158e-06, |
|
"loss": 0.1722, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.086092715231788, |
|
"grad_norm": 1.0707038640975952, |
|
"learning_rate": 3.547169811320755e-06, |
|
"loss": 0.1709, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 1.088130412633724, |
|
"grad_norm": 1.2310295104980469, |
|
"learning_rate": 3.5433962264150943e-06, |
|
"loss": 0.187, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.0901681100356597, |
|
"grad_norm": 1.098715901374817, |
|
"learning_rate": 3.5396226415094342e-06, |
|
"loss": 0.1695, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.0922058074375955, |
|
"grad_norm": 1.1150951385498047, |
|
"learning_rate": 3.535849056603774e-06, |
|
"loss": 0.1717, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.0942435048395314, |
|
"grad_norm": 1.0338242053985596, |
|
"learning_rate": 3.5320754716981136e-06, |
|
"loss": 0.1789, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 1.0962812022414672, |
|
"grad_norm": 1.0984159708023071, |
|
"learning_rate": 3.5283018867924527e-06, |
|
"loss": 0.1767, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 1.098318899643403, |
|
"grad_norm": 1.1370503902435303, |
|
"learning_rate": 3.5245283018867926e-06, |
|
"loss": 0.1863, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 1.1003565970453388, |
|
"grad_norm": 1.1123195886611938, |
|
"learning_rate": 3.5207547169811325e-06, |
|
"loss": 0.1733, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.1023942944472747, |
|
"grad_norm": 1.1519520282745361, |
|
"learning_rate": 3.516981132075472e-06, |
|
"loss": 0.1669, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 1.1044319918492105, |
|
"grad_norm": 1.1219109296798706, |
|
"learning_rate": 3.513207547169812e-06, |
|
"loss": 0.1788, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 1.1064696892511463, |
|
"grad_norm": 1.237865686416626, |
|
"learning_rate": 3.509433962264151e-06, |
|
"loss": 0.1776, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.108507386653082, |
|
"grad_norm": 1.0959861278533936, |
|
"learning_rate": 3.505660377358491e-06, |
|
"loss": 0.1773, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.1105450840550177, |
|
"grad_norm": 1.079746127128601, |
|
"learning_rate": 3.5018867924528304e-06, |
|
"loss": 0.1942, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.1125827814569536, |
|
"grad_norm": 1.1233259439468384, |
|
"learning_rate": 3.4981132075471703e-06, |
|
"loss": 0.1707, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.1146204788588894, |
|
"grad_norm": 1.2879219055175781, |
|
"learning_rate": 3.49433962264151e-06, |
|
"loss": 0.1746, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 1.1166581762608252, |
|
"grad_norm": 1.1267422437667847, |
|
"learning_rate": 3.4905660377358493e-06, |
|
"loss": 0.1782, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.118695873662761, |
|
"grad_norm": 1.397052526473999, |
|
"learning_rate": 3.486792452830189e-06, |
|
"loss": 0.1696, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.1207335710646968, |
|
"grad_norm": 1.3258302211761475, |
|
"learning_rate": 3.4830188679245287e-06, |
|
"loss": 0.1668, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.1227712684666327, |
|
"grad_norm": 1.225081205368042, |
|
"learning_rate": 3.479245283018868e-06, |
|
"loss": 0.179, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 1.1248089658685685, |
|
"grad_norm": 1.187245488166809, |
|
"learning_rate": 3.475471698113208e-06, |
|
"loss": 0.1739, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.1268466632705043, |
|
"grad_norm": 1.2275511026382446, |
|
"learning_rate": 3.471698113207547e-06, |
|
"loss": 0.1892, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 1.1288843606724401, |
|
"grad_norm": 1.1659022569656372, |
|
"learning_rate": 3.467924528301887e-06, |
|
"loss": 0.1801, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.130922058074376, |
|
"grad_norm": 1.3677842617034912, |
|
"learning_rate": 3.4641509433962266e-06, |
|
"loss": 0.1787, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.1329597554763118, |
|
"grad_norm": 1.2617255449295044, |
|
"learning_rate": 3.4603773584905665e-06, |
|
"loss": 0.1792, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 1.1349974528782476, |
|
"grad_norm": 1.1734035015106201, |
|
"learning_rate": 3.456603773584906e-06, |
|
"loss": 0.1735, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 1.1370351502801834, |
|
"grad_norm": 1.3135229349136353, |
|
"learning_rate": 3.4528301886792455e-06, |
|
"loss": 0.1844, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.1390728476821192, |
|
"grad_norm": 1.281538724899292, |
|
"learning_rate": 3.449056603773585e-06, |
|
"loss": 0.1811, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 1.141110545084055, |
|
"grad_norm": 1.1368190050125122, |
|
"learning_rate": 3.445283018867925e-06, |
|
"loss": 0.1633, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.143148242485991, |
|
"grad_norm": 1.0890092849731445, |
|
"learning_rate": 3.4415094339622644e-06, |
|
"loss": 0.178, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.1451859398879267, |
|
"grad_norm": 1.20881986618042, |
|
"learning_rate": 3.4377358490566043e-06, |
|
"loss": 0.1722, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.1472236372898625, |
|
"grad_norm": 1.1593676805496216, |
|
"learning_rate": 3.4339622641509434e-06, |
|
"loss": 0.1829, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 1.1492613346917984, |
|
"grad_norm": 1.090755820274353, |
|
"learning_rate": 3.4301886792452833e-06, |
|
"loss": 0.1787, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.1512990320937342, |
|
"grad_norm": 1.2112749814987183, |
|
"learning_rate": 3.4264150943396228e-06, |
|
"loss": 0.1801, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.15333672949567, |
|
"grad_norm": 1.119545340538025, |
|
"learning_rate": 3.4226415094339627e-06, |
|
"loss": 0.1821, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.1553744268976056, |
|
"grad_norm": 1.1820521354675293, |
|
"learning_rate": 3.4188679245283026e-06, |
|
"loss": 0.189, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.1574121242995414, |
|
"grad_norm": 1.2243889570236206, |
|
"learning_rate": 3.4150943396226417e-06, |
|
"loss": 0.1838, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.1594498217014773, |
|
"grad_norm": 1.0234663486480713, |
|
"learning_rate": 3.411320754716981e-06, |
|
"loss": 0.1767, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 1.161487519103413, |
|
"grad_norm": 1.209953784942627, |
|
"learning_rate": 3.407547169811321e-06, |
|
"loss": 0.1769, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.163525216505349, |
|
"grad_norm": 1.1745116710662842, |
|
"learning_rate": 3.403773584905661e-06, |
|
"loss": 0.1856, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 1.1655629139072847, |
|
"grad_norm": 1.071757435798645, |
|
"learning_rate": 3.4000000000000005e-06, |
|
"loss": 0.1591, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.1676006113092205, |
|
"grad_norm": 1.150458574295044, |
|
"learning_rate": 3.3962264150943395e-06, |
|
"loss": 0.1776, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.1696383087111564, |
|
"grad_norm": 1.193291187286377, |
|
"learning_rate": 3.3924528301886795e-06, |
|
"loss": 0.175, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.1716760061130922, |
|
"grad_norm": 1.2312043905258179, |
|
"learning_rate": 3.3886792452830194e-06, |
|
"loss": 0.1906, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.173713703515028, |
|
"grad_norm": 1.1354984045028687, |
|
"learning_rate": 3.384905660377359e-06, |
|
"loss": 0.1753, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.1757514009169638, |
|
"grad_norm": 1.3425500392913818, |
|
"learning_rate": 3.3811320754716988e-06, |
|
"loss": 0.1876, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 1.1777890983188997, |
|
"grad_norm": 1.0738446712493896, |
|
"learning_rate": 3.377358490566038e-06, |
|
"loss": 0.1765, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 1.1798267957208355, |
|
"grad_norm": 1.1612354516983032, |
|
"learning_rate": 3.3735849056603773e-06, |
|
"loss": 0.1712, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 1.1818644931227713, |
|
"grad_norm": 1.2308764457702637, |
|
"learning_rate": 3.3698113207547173e-06, |
|
"loss": 0.182, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.1839021905247071, |
|
"grad_norm": 1.3299064636230469, |
|
"learning_rate": 3.366037735849057e-06, |
|
"loss": 0.1812, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 1.185939887926643, |
|
"grad_norm": 1.1064029932022095, |
|
"learning_rate": 3.3622641509433967e-06, |
|
"loss": 0.1853, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.1879775853285788, |
|
"grad_norm": 1.131239414215088, |
|
"learning_rate": 3.3584905660377357e-06, |
|
"loss": 0.1827, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 1.1900152827305146, |
|
"grad_norm": 1.1805070638656616, |
|
"learning_rate": 3.3547169811320756e-06, |
|
"loss": 0.1937, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 1.1920529801324504, |
|
"grad_norm": 1.2116690874099731, |
|
"learning_rate": 3.3509433962264156e-06, |
|
"loss": 0.197, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.194090677534386, |
|
"grad_norm": 1.3518807888031006, |
|
"learning_rate": 3.347169811320755e-06, |
|
"loss": 0.1765, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 1.1961283749363218, |
|
"grad_norm": 1.2591750621795654, |
|
"learning_rate": 3.343396226415095e-06, |
|
"loss": 0.1782, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 1.1981660723382577, |
|
"grad_norm": 1.1681146621704102, |
|
"learning_rate": 3.339622641509434e-06, |
|
"loss": 0.1811, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.2002037697401935, |
|
"grad_norm": 1.2340030670166016, |
|
"learning_rate": 3.335849056603774e-06, |
|
"loss": 0.1732, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 1.2022414671421293, |
|
"grad_norm": 1.2480478286743164, |
|
"learning_rate": 3.3320754716981134e-06, |
|
"loss": 0.1747, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.2042791645440651, |
|
"grad_norm": 1.2134257555007935, |
|
"learning_rate": 3.3283018867924534e-06, |
|
"loss": 0.1807, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 1.206316861946001, |
|
"grad_norm": 1.050817608833313, |
|
"learning_rate": 3.324528301886793e-06, |
|
"loss": 0.1725, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 1.2083545593479368, |
|
"grad_norm": 1.2634903192520142, |
|
"learning_rate": 3.3207547169811323e-06, |
|
"loss": 0.1883, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 1.2103922567498726, |
|
"grad_norm": 1.2350244522094727, |
|
"learning_rate": 3.316981132075472e-06, |
|
"loss": 0.1872, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.2124299541518084, |
|
"grad_norm": 1.232961893081665, |
|
"learning_rate": 3.3132075471698117e-06, |
|
"loss": 0.1654, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.2144676515537443, |
|
"grad_norm": 1.163649320602417, |
|
"learning_rate": 3.3094339622641512e-06, |
|
"loss": 0.1933, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 1.21650534895568, |
|
"grad_norm": 1.21866774559021, |
|
"learning_rate": 3.305660377358491e-06, |
|
"loss": 0.1818, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 1.218543046357616, |
|
"grad_norm": 1.1113258600234985, |
|
"learning_rate": 3.30188679245283e-06, |
|
"loss": 0.1755, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 1.2205807437595517, |
|
"grad_norm": 1.1248152256011963, |
|
"learning_rate": 3.29811320754717e-06, |
|
"loss": 0.1756, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 1.2226184411614875, |
|
"grad_norm": 1.1161712408065796, |
|
"learning_rate": 3.2943396226415096e-06, |
|
"loss": 0.1967, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.2246561385634234, |
|
"grad_norm": 1.1488161087036133, |
|
"learning_rate": 3.2905660377358495e-06, |
|
"loss": 0.1791, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 1.2266938359653592, |
|
"grad_norm": 1.2753115892410278, |
|
"learning_rate": 3.286792452830189e-06, |
|
"loss": 0.1739, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.228731533367295, |
|
"grad_norm": 1.1130990982055664, |
|
"learning_rate": 3.2830188679245285e-06, |
|
"loss": 0.1682, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 1.2307692307692308, |
|
"grad_norm": 1.1455460786819458, |
|
"learning_rate": 3.279245283018868e-06, |
|
"loss": 0.1854, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 1.2328069281711667, |
|
"grad_norm": 1.1896706819534302, |
|
"learning_rate": 3.275471698113208e-06, |
|
"loss": 0.1901, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.2348446255731025, |
|
"grad_norm": 1.132242202758789, |
|
"learning_rate": 3.2716981132075474e-06, |
|
"loss": 0.1705, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.2368823229750383, |
|
"grad_norm": 1.1296707391738892, |
|
"learning_rate": 3.2679245283018873e-06, |
|
"loss": 0.1875, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 1.2389200203769741, |
|
"grad_norm": 1.2837047576904297, |
|
"learning_rate": 3.2641509433962264e-06, |
|
"loss": 0.1862, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 1.24095771777891, |
|
"grad_norm": 1.2516281604766846, |
|
"learning_rate": 3.2603773584905663e-06, |
|
"loss": 0.1763, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 1.2429954151808456, |
|
"grad_norm": 1.2051138877868652, |
|
"learning_rate": 3.256603773584906e-06, |
|
"loss": 0.1681, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.2450331125827814, |
|
"grad_norm": 1.1206097602844238, |
|
"learning_rate": 3.2528301886792457e-06, |
|
"loss": 0.1895, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 1.2470708099847172, |
|
"grad_norm": 1.085570216178894, |
|
"learning_rate": 3.2490566037735848e-06, |
|
"loss": 0.1674, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.249108507386653, |
|
"grad_norm": 1.1711559295654297, |
|
"learning_rate": 3.2452830188679247e-06, |
|
"loss": 0.1847, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 1.2511462047885888, |
|
"grad_norm": 1.2569772005081177, |
|
"learning_rate": 3.241509433962264e-06, |
|
"loss": 0.185, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 1.2531839021905247, |
|
"grad_norm": 1.265191912651062, |
|
"learning_rate": 3.237735849056604e-06, |
|
"loss": 0.188, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.2552215995924605, |
|
"grad_norm": 1.2143467664718628, |
|
"learning_rate": 3.233962264150944e-06, |
|
"loss": 0.189, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.2572592969943963, |
|
"grad_norm": 1.3829542398452759, |
|
"learning_rate": 3.230188679245283e-06, |
|
"loss": 0.174, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 1.2592969943963321, |
|
"grad_norm": 1.2590124607086182, |
|
"learning_rate": 3.2264150943396226e-06, |
|
"loss": 0.1921, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.261334691798268, |
|
"grad_norm": 1.125143051147461, |
|
"learning_rate": 3.2226415094339625e-06, |
|
"loss": 0.1788, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 1.2633723892002038, |
|
"grad_norm": 1.136713981628418, |
|
"learning_rate": 3.2188679245283024e-06, |
|
"loss": 0.1665, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.2654100866021396, |
|
"grad_norm": 1.1080840826034546, |
|
"learning_rate": 3.215094339622642e-06, |
|
"loss": 0.1785, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 1.2674477840040754, |
|
"grad_norm": 1.0990139245986938, |
|
"learning_rate": 3.211320754716981e-06, |
|
"loss": 0.1759, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 1.2694854814060113, |
|
"grad_norm": 1.2469940185546875, |
|
"learning_rate": 3.207547169811321e-06, |
|
"loss": 0.1738, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 1.271523178807947, |
|
"grad_norm": 1.163061261177063, |
|
"learning_rate": 3.2037735849056608e-06, |
|
"loss": 0.1881, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.273560876209883, |
|
"grad_norm": 1.1554782390594482, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"loss": 0.1728, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.2755985736118187, |
|
"grad_norm": 1.1074477434158325, |
|
"learning_rate": 3.19622641509434e-06, |
|
"loss": 0.1762, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 1.2776362710137543, |
|
"grad_norm": 1.1363695859909058, |
|
"learning_rate": 3.1924528301886793e-06, |
|
"loss": 0.172, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 1.2796739684156901, |
|
"grad_norm": 1.0740599632263184, |
|
"learning_rate": 3.188679245283019e-06, |
|
"loss": 0.1634, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 1.281711665817626, |
|
"grad_norm": 1.0671052932739258, |
|
"learning_rate": 3.1849056603773587e-06, |
|
"loss": 0.1749, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 1.2837493632195618, |
|
"grad_norm": 1.1366360187530518, |
|
"learning_rate": 3.1811320754716986e-06, |
|
"loss": 0.1685, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.2857870606214976, |
|
"grad_norm": 1.250622272491455, |
|
"learning_rate": 3.177358490566038e-06, |
|
"loss": 0.1758, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 1.2878247580234334, |
|
"grad_norm": 1.145407795906067, |
|
"learning_rate": 3.1735849056603776e-06, |
|
"loss": 0.1881, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 1.2898624554253693, |
|
"grad_norm": 1.1561169624328613, |
|
"learning_rate": 3.169811320754717e-06, |
|
"loss": 0.1846, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 1.291900152827305, |
|
"grad_norm": 1.1274852752685547, |
|
"learning_rate": 3.166037735849057e-06, |
|
"loss": 0.1765, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 1.293937850229241, |
|
"grad_norm": 1.2915289402008057, |
|
"learning_rate": 3.1622641509433965e-06, |
|
"loss": 0.1767, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.2959755476311767, |
|
"grad_norm": 1.1345237493515015, |
|
"learning_rate": 3.1584905660377364e-06, |
|
"loss": 0.1689, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.2980132450331126, |
|
"grad_norm": 1.2380014657974243, |
|
"learning_rate": 3.1547169811320754e-06, |
|
"loss": 0.19, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 1.3000509424350484, |
|
"grad_norm": 1.1787712574005127, |
|
"learning_rate": 3.1509433962264154e-06, |
|
"loss": 0.187, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.3020886398369842, |
|
"grad_norm": 1.172777771949768, |
|
"learning_rate": 3.147169811320755e-06, |
|
"loss": 0.1951, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 1.30412633723892, |
|
"grad_norm": 1.1491492986679077, |
|
"learning_rate": 3.1433962264150948e-06, |
|
"loss": 0.1651, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.3061640346408558, |
|
"grad_norm": 1.1255732774734497, |
|
"learning_rate": 3.1396226415094343e-06, |
|
"loss": 0.1838, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 1.3082017320427917, |
|
"grad_norm": 1.2315205335617065, |
|
"learning_rate": 3.1358490566037737e-06, |
|
"loss": 0.1785, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 1.3102394294447275, |
|
"grad_norm": 1.1849606037139893, |
|
"learning_rate": 3.1320754716981132e-06, |
|
"loss": 0.177, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 1.3122771268466633, |
|
"grad_norm": 1.1372692584991455, |
|
"learning_rate": 3.128301886792453e-06, |
|
"loss": 0.1747, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 1.3143148242485991, |
|
"grad_norm": 1.2609679698944092, |
|
"learning_rate": 3.1245283018867926e-06, |
|
"loss": 0.1836, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.316352521650535, |
|
"grad_norm": 1.179504156112671, |
|
"learning_rate": 3.1207547169811326e-06, |
|
"loss": 0.1771, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 1.3183902190524708, |
|
"grad_norm": 1.2097948789596558, |
|
"learning_rate": 3.1169811320754716e-06, |
|
"loss": 0.1873, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 1.3204279164544066, |
|
"grad_norm": 1.1823457479476929, |
|
"learning_rate": 3.1132075471698115e-06, |
|
"loss": 0.1893, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.3224656138563424, |
|
"grad_norm": 1.1036756038665771, |
|
"learning_rate": 3.109433962264151e-06, |
|
"loss": 0.1651, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 1.3245033112582782, |
|
"grad_norm": 1.1787657737731934, |
|
"learning_rate": 3.105660377358491e-06, |
|
"loss": 0.1675, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.326541008660214, |
|
"grad_norm": 1.2578370571136475, |
|
"learning_rate": 3.1018867924528304e-06, |
|
"loss": 0.1854, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 1.32857870606215, |
|
"grad_norm": 1.136648178100586, |
|
"learning_rate": 3.09811320754717e-06, |
|
"loss": 0.1744, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 1.3306164034640857, |
|
"grad_norm": 1.1764239072799683, |
|
"learning_rate": 3.0943396226415094e-06, |
|
"loss": 0.1721, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 1.3326541008660213, |
|
"grad_norm": 1.0723998546600342, |
|
"learning_rate": 3.0905660377358493e-06, |
|
"loss": 0.1719, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 1.3346917982679571, |
|
"grad_norm": 1.1238343715667725, |
|
"learning_rate": 3.086792452830189e-06, |
|
"loss": 0.1752, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.336729495669893, |
|
"grad_norm": 1.1110343933105469, |
|
"learning_rate": 3.0830188679245287e-06, |
|
"loss": 0.1806, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 1.3387671930718288, |
|
"grad_norm": 1.1625982522964478, |
|
"learning_rate": 3.079245283018868e-06, |
|
"loss": 0.1919, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 1.3408048904737646, |
|
"grad_norm": 1.2139103412628174, |
|
"learning_rate": 3.0754716981132077e-06, |
|
"loss": 0.1807, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 1.3428425878757004, |
|
"grad_norm": 1.2624015808105469, |
|
"learning_rate": 3.071698113207547e-06, |
|
"loss": 0.1774, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 1.3448802852776363, |
|
"grad_norm": 1.1833164691925049, |
|
"learning_rate": 3.067924528301887e-06, |
|
"loss": 0.1765, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.346917982679572, |
|
"grad_norm": 1.3421837091445923, |
|
"learning_rate": 3.064150943396227e-06, |
|
"loss": 0.1861, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 1.348955680081508, |
|
"grad_norm": 1.1380257606506348, |
|
"learning_rate": 3.060377358490566e-06, |
|
"loss": 0.1642, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 1.3509933774834437, |
|
"grad_norm": 1.2193323373794556, |
|
"learning_rate": 3.0566037735849056e-06, |
|
"loss": 0.1804, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 1.3530310748853795, |
|
"grad_norm": 1.0917553901672363, |
|
"learning_rate": 3.0528301886792455e-06, |
|
"loss": 0.1761, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 1.3550687722873154, |
|
"grad_norm": 1.252640724182129, |
|
"learning_rate": 3.0490566037735854e-06, |
|
"loss": 0.1892, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.3571064696892512, |
|
"grad_norm": 1.2436408996582031, |
|
"learning_rate": 3.045283018867925e-06, |
|
"loss": 0.1865, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 1.359144167091187, |
|
"grad_norm": 1.0737476348876953, |
|
"learning_rate": 3.041509433962264e-06, |
|
"loss": 0.173, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 1.3611818644931228, |
|
"grad_norm": 1.3767677545547485, |
|
"learning_rate": 3.037735849056604e-06, |
|
"loss": 0.1855, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 1.3632195618950587, |
|
"grad_norm": 1.1147671937942505, |
|
"learning_rate": 3.033962264150944e-06, |
|
"loss": 0.175, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 1.3652572592969943, |
|
"grad_norm": 1.2812708616256714, |
|
"learning_rate": 3.0301886792452833e-06, |
|
"loss": 0.1844, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.36729495669893, |
|
"grad_norm": 1.028883695602417, |
|
"learning_rate": 3.0264150943396232e-06, |
|
"loss": 0.1641, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 1.369332654100866, |
|
"grad_norm": 1.2508153915405273, |
|
"learning_rate": 3.0226415094339623e-06, |
|
"loss": 0.1884, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.3713703515028017, |
|
"grad_norm": 1.2635626792907715, |
|
"learning_rate": 3.018867924528302e-06, |
|
"loss": 0.1835, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 1.3734080489047376, |
|
"grad_norm": 1.1258081197738647, |
|
"learning_rate": 3.0150943396226417e-06, |
|
"loss": 0.1694, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 1.3754457463066734, |
|
"grad_norm": 1.1584776639938354, |
|
"learning_rate": 3.0113207547169816e-06, |
|
"loss": 0.1928, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.3774834437086092, |
|
"grad_norm": 1.1394814252853394, |
|
"learning_rate": 3.007547169811321e-06, |
|
"loss": 0.1698, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 1.379521141110545, |
|
"grad_norm": 1.1019212007522583, |
|
"learning_rate": 3.0037735849056606e-06, |
|
"loss": 0.1604, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 1.3815588385124808, |
|
"grad_norm": 1.262918472290039, |
|
"learning_rate": 3e-06, |
|
"loss": 0.1713, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 1.3835965359144167, |
|
"grad_norm": 1.1134512424468994, |
|
"learning_rate": 2.99622641509434e-06, |
|
"loss": 0.1738, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 1.3856342333163525, |
|
"grad_norm": 1.1910215616226196, |
|
"learning_rate": 2.9924528301886795e-06, |
|
"loss": 0.1854, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.3876719307182883, |
|
"grad_norm": 1.0705041885375977, |
|
"learning_rate": 2.9886792452830194e-06, |
|
"loss": 0.1679, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 1.3897096281202241, |
|
"grad_norm": 1.0849546194076538, |
|
"learning_rate": 2.9849056603773585e-06, |
|
"loss": 0.1732, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 1.39174732552216, |
|
"grad_norm": 1.1088389158248901, |
|
"learning_rate": 2.9811320754716984e-06, |
|
"loss": 0.1832, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 1.3937850229240958, |
|
"grad_norm": 1.1701173782348633, |
|
"learning_rate": 2.977358490566038e-06, |
|
"loss": 0.1832, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 1.3958227203260316, |
|
"grad_norm": 1.1918519735336304, |
|
"learning_rate": 2.9735849056603778e-06, |
|
"loss": 0.1863, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.3978604177279674, |
|
"grad_norm": 1.207116723060608, |
|
"learning_rate": 2.9698113207547173e-06, |
|
"loss": 0.1806, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 1.3998981151299033, |
|
"grad_norm": 1.2102634906768799, |
|
"learning_rate": 2.9660377358490568e-06, |
|
"loss": 0.1759, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 1.401935812531839, |
|
"grad_norm": 1.1316732168197632, |
|
"learning_rate": 2.9622641509433963e-06, |
|
"loss": 0.17, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 1.403973509933775, |
|
"grad_norm": 1.204567790031433, |
|
"learning_rate": 2.958490566037736e-06, |
|
"loss": 0.1908, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 1.4060112073357107, |
|
"grad_norm": 1.0931925773620605, |
|
"learning_rate": 2.9547169811320757e-06, |
|
"loss": 0.1784, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.4080489047376465, |
|
"grad_norm": 1.2366472482681274, |
|
"learning_rate": 2.9509433962264156e-06, |
|
"loss": 0.2053, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 1.4100866021395824, |
|
"grad_norm": 1.169756531715393, |
|
"learning_rate": 2.9471698113207546e-06, |
|
"loss": 0.1803, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 1.4121242995415182, |
|
"grad_norm": 1.271429419517517, |
|
"learning_rate": 2.9433962264150946e-06, |
|
"loss": 0.1867, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 1.414161996943454, |
|
"grad_norm": 1.2226650714874268, |
|
"learning_rate": 2.939622641509434e-06, |
|
"loss": 0.1643, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 1.4161996943453898, |
|
"grad_norm": 1.2417409420013428, |
|
"learning_rate": 2.935849056603774e-06, |
|
"loss": 0.1897, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.4182373917473257, |
|
"grad_norm": 1.24673593044281, |
|
"learning_rate": 2.932075471698114e-06, |
|
"loss": 0.1648, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 1.4202750891492613, |
|
"grad_norm": 1.336515188217163, |
|
"learning_rate": 2.928301886792453e-06, |
|
"loss": 0.1913, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 1.422312786551197, |
|
"grad_norm": 1.1495544910430908, |
|
"learning_rate": 2.9245283018867924e-06, |
|
"loss": 0.1825, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 1.424350483953133, |
|
"grad_norm": 1.181207537651062, |
|
"learning_rate": 2.9207547169811324e-06, |
|
"loss": 0.1814, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 1.4263881813550687, |
|
"grad_norm": 1.2883107662200928, |
|
"learning_rate": 2.9169811320754723e-06, |
|
"loss": 0.1859, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.4284258787570046, |
|
"grad_norm": 1.14235520362854, |
|
"learning_rate": 2.9132075471698118e-06, |
|
"loss": 0.1846, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 1.4304635761589404, |
|
"grad_norm": 1.0994147062301636, |
|
"learning_rate": 2.909433962264151e-06, |
|
"loss": 0.18, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 1.4325012735608762, |
|
"grad_norm": 1.2511541843414307, |
|
"learning_rate": 2.9056603773584907e-06, |
|
"loss": 0.1759, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 1.434538970962812, |
|
"grad_norm": 1.0954980850219727, |
|
"learning_rate": 2.9018867924528307e-06, |
|
"loss": 0.1724, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 1.4365766683647478, |
|
"grad_norm": 1.3084522485733032, |
|
"learning_rate": 2.89811320754717e-06, |
|
"loss": 0.179, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.4386143657666837, |
|
"grad_norm": 1.1592984199523926, |
|
"learning_rate": 2.89433962264151e-06, |
|
"loss": 0.1798, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 1.4406520631686195, |
|
"grad_norm": 1.1409646272659302, |
|
"learning_rate": 2.890566037735849e-06, |
|
"loss": 0.175, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 1.4426897605705553, |
|
"grad_norm": 1.3026984930038452, |
|
"learning_rate": 2.886792452830189e-06, |
|
"loss": 0.1801, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 1.4447274579724911, |
|
"grad_norm": 1.115729570388794, |
|
"learning_rate": 2.8830188679245285e-06, |
|
"loss": 0.188, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 1.446765155374427, |
|
"grad_norm": 1.3142112493515015, |
|
"learning_rate": 2.8792452830188684e-06, |
|
"loss": 0.187, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.4488028527763628, |
|
"grad_norm": 1.2339842319488525, |
|
"learning_rate": 2.875471698113208e-06, |
|
"loss": 0.1614, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 1.4508405501782986, |
|
"grad_norm": 1.2981687784194946, |
|
"learning_rate": 2.871698113207547e-06, |
|
"loss": 0.1688, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 1.4528782475802342, |
|
"grad_norm": 1.1264586448669434, |
|
"learning_rate": 2.867924528301887e-06, |
|
"loss": 0.1754, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 1.45491594498217, |
|
"grad_norm": 1.1794300079345703, |
|
"learning_rate": 2.864150943396227e-06, |
|
"loss": 0.1876, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 1.4569536423841059, |
|
"grad_norm": 1.0934234857559204, |
|
"learning_rate": 2.8603773584905663e-06, |
|
"loss": 0.18, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.4589913397860417, |
|
"grad_norm": 1.1383419036865234, |
|
"learning_rate": 2.8566037735849062e-06, |
|
"loss": 0.1812, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 1.4610290371879775, |
|
"grad_norm": 1.1334176063537598, |
|
"learning_rate": 2.8528301886792453e-06, |
|
"loss": 0.1699, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 1.4630667345899133, |
|
"grad_norm": 1.2105752229690552, |
|
"learning_rate": 2.8490566037735852e-06, |
|
"loss": 0.1886, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 1.4651044319918491, |
|
"grad_norm": 1.1222751140594482, |
|
"learning_rate": 2.8452830188679247e-06, |
|
"loss": 0.1677, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 1.467142129393785, |
|
"grad_norm": 1.0429809093475342, |
|
"learning_rate": 2.8415094339622646e-06, |
|
"loss": 0.1801, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.4691798267957208, |
|
"grad_norm": 1.1673039197921753, |
|
"learning_rate": 2.837735849056604e-06, |
|
"loss": 0.1824, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 1.4712175241976566, |
|
"grad_norm": 1.2965126037597656, |
|
"learning_rate": 2.8339622641509436e-06, |
|
"loss": 0.1789, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 1.4732552215995924, |
|
"grad_norm": 1.1965491771697998, |
|
"learning_rate": 2.830188679245283e-06, |
|
"loss": 0.1875, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 1.4752929190015283, |
|
"grad_norm": 1.1529309749603271, |
|
"learning_rate": 2.826415094339623e-06, |
|
"loss": 0.1846, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 1.477330616403464, |
|
"grad_norm": 1.1195148229599, |
|
"learning_rate": 2.8226415094339625e-06, |
|
"loss": 0.1655, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.4793683138054, |
|
"grad_norm": 1.2534137964248657, |
|
"learning_rate": 2.8188679245283024e-06, |
|
"loss": 0.1779, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 1.4814060112073357, |
|
"grad_norm": 1.1430234909057617, |
|
"learning_rate": 2.8150943396226415e-06, |
|
"loss": 0.1855, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 1.4834437086092715, |
|
"grad_norm": 1.1733477115631104, |
|
"learning_rate": 2.8113207547169814e-06, |
|
"loss": 0.1818, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 1.4854814060112074, |
|
"grad_norm": 1.2729791402816772, |
|
"learning_rate": 2.807547169811321e-06, |
|
"loss": 0.1845, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 1.4875191034131432, |
|
"grad_norm": 1.2047133445739746, |
|
"learning_rate": 2.803773584905661e-06, |
|
"loss": 0.1853, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.489556800815079, |
|
"grad_norm": 1.0154218673706055, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"loss": 0.1845, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 1.4915944982170148, |
|
"grad_norm": 1.0939674377441406, |
|
"learning_rate": 2.7962264150943398e-06, |
|
"loss": 0.1856, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 1.4936321956189507, |
|
"grad_norm": 1.1324870586395264, |
|
"learning_rate": 2.7924528301886793e-06, |
|
"loss": 0.1755, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 1.4956698930208865, |
|
"grad_norm": 1.4036580324172974, |
|
"learning_rate": 2.788679245283019e-06, |
|
"loss": 0.2023, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 1.4977075904228223, |
|
"grad_norm": 1.1628963947296143, |
|
"learning_rate": 2.7849056603773587e-06, |
|
"loss": 0.1787, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 1.4997452878247581, |
|
"grad_norm": 1.0612685680389404, |
|
"learning_rate": 2.7811320754716986e-06, |
|
"loss": 0.1709, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 1.501782985226694, |
|
"grad_norm": 1.1758002042770386, |
|
"learning_rate": 2.7773584905660377e-06, |
|
"loss": 0.1844, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 1.5038206826286298, |
|
"grad_norm": 1.1747825145721436, |
|
"learning_rate": 2.7735849056603776e-06, |
|
"loss": 0.1759, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 1.5058583800305656, |
|
"grad_norm": 1.1918827295303345, |
|
"learning_rate": 2.769811320754717e-06, |
|
"loss": 0.1685, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 1.5078960774325014, |
|
"grad_norm": 1.1047258377075195, |
|
"learning_rate": 2.766037735849057e-06, |
|
"loss": 0.1826, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.5099337748344372, |
|
"grad_norm": 1.209409236907959, |
|
"learning_rate": 2.762264150943397e-06, |
|
"loss": 0.1703, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 1.5119714722363728, |
|
"grad_norm": 1.1031354665756226, |
|
"learning_rate": 2.758490566037736e-06, |
|
"loss": 0.1766, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 1.5140091696383087, |
|
"grad_norm": 1.2434014081954956, |
|
"learning_rate": 2.7547169811320755e-06, |
|
"loss": 0.1844, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 1.5160468670402445, |
|
"grad_norm": 1.177281379699707, |
|
"learning_rate": 2.7509433962264154e-06, |
|
"loss": 0.186, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 1.5180845644421803, |
|
"grad_norm": 1.0548818111419678, |
|
"learning_rate": 2.7471698113207553e-06, |
|
"loss": 0.1675, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 1.5201222618441161, |
|
"grad_norm": 1.1306318044662476, |
|
"learning_rate": 2.7433962264150944e-06, |
|
"loss": 0.1713, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 1.522159959246052, |
|
"grad_norm": 1.205263376235962, |
|
"learning_rate": 2.739622641509434e-06, |
|
"loss": 0.1906, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 1.5241976566479878, |
|
"grad_norm": 2.5892493724823, |
|
"learning_rate": 2.7358490566037738e-06, |
|
"loss": 0.1757, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 1.5262353540499236, |
|
"grad_norm": 1.0715084075927734, |
|
"learning_rate": 2.7320754716981137e-06, |
|
"loss": 0.173, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 1.5282730514518594, |
|
"grad_norm": 1.231529712677002, |
|
"learning_rate": 2.728301886792453e-06, |
|
"loss": 0.1701, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.5303107488537953, |
|
"grad_norm": 1.2592768669128418, |
|
"learning_rate": 2.7245283018867922e-06, |
|
"loss": 0.1774, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 1.532348446255731, |
|
"grad_norm": 1.2342033386230469, |
|
"learning_rate": 2.720754716981132e-06, |
|
"loss": 0.17, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 1.5343861436576667, |
|
"grad_norm": 1.1225703954696655, |
|
"learning_rate": 2.716981132075472e-06, |
|
"loss": 0.1786, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 1.5364238410596025, |
|
"grad_norm": 1.204437494277954, |
|
"learning_rate": 2.7132075471698116e-06, |
|
"loss": 0.182, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 1.5384615384615383, |
|
"grad_norm": 1.152274489402771, |
|
"learning_rate": 2.7094339622641515e-06, |
|
"loss": 0.1845, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 1.5404992358634741, |
|
"grad_norm": 1.268399715423584, |
|
"learning_rate": 2.7056603773584905e-06, |
|
"loss": 0.1866, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 1.54253693326541, |
|
"grad_norm": 1.3325903415679932, |
|
"learning_rate": 2.7018867924528304e-06, |
|
"loss": 0.1788, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 1.5445746306673458, |
|
"grad_norm": 1.164884090423584, |
|
"learning_rate": 2.69811320754717e-06, |
|
"loss": 0.1863, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 1.5466123280692816, |
|
"grad_norm": 1.1347957849502563, |
|
"learning_rate": 2.69433962264151e-06, |
|
"loss": 0.1902, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 1.5486500254712174, |
|
"grad_norm": 1.1705092191696167, |
|
"learning_rate": 2.6905660377358493e-06, |
|
"loss": 0.1757, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.5506877228731533, |
|
"grad_norm": 1.1735482215881348, |
|
"learning_rate": 2.686792452830189e-06, |
|
"loss": 0.1743, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 1.552725420275089, |
|
"grad_norm": 1.1496127843856812, |
|
"learning_rate": 2.6830188679245283e-06, |
|
"loss": 0.1704, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 1.554763117677025, |
|
"grad_norm": 1.1327245235443115, |
|
"learning_rate": 2.6792452830188682e-06, |
|
"loss": 0.1687, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 1.5568008150789607, |
|
"grad_norm": 1.235737919807434, |
|
"learning_rate": 2.6754716981132077e-06, |
|
"loss": 0.1699, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 1.5588385124808966, |
|
"grad_norm": 1.0961453914642334, |
|
"learning_rate": 2.6716981132075476e-06, |
|
"loss": 0.174, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 1.5608762098828324, |
|
"grad_norm": 1.1706377267837524, |
|
"learning_rate": 2.6679245283018867e-06, |
|
"loss": 0.1679, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 1.5629139072847682, |
|
"grad_norm": 1.314253330230713, |
|
"learning_rate": 2.6641509433962266e-06, |
|
"loss": 0.1859, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 1.564951604686704, |
|
"grad_norm": 1.0271321535110474, |
|
"learning_rate": 2.660377358490566e-06, |
|
"loss": 0.1717, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 1.5669893020886398, |
|
"grad_norm": 1.11105215549469, |
|
"learning_rate": 2.656603773584906e-06, |
|
"loss": 0.1699, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 1.5690269994905757, |
|
"grad_norm": 1.2342256307601929, |
|
"learning_rate": 2.6528301886792455e-06, |
|
"loss": 0.1836, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.5710646968925115, |
|
"grad_norm": 1.208130121231079, |
|
"learning_rate": 2.649056603773585e-06, |
|
"loss": 0.1708, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 1.5731023942944473, |
|
"grad_norm": 1.235351324081421, |
|
"learning_rate": 2.6452830188679245e-06, |
|
"loss": 0.1976, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 1.5751400916963831, |
|
"grad_norm": 1.0710421800613403, |
|
"learning_rate": 2.6415094339622644e-06, |
|
"loss": 0.1734, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 1.577177789098319, |
|
"grad_norm": 0.9788026213645935, |
|
"learning_rate": 2.637735849056604e-06, |
|
"loss": 0.1701, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 1.5792154865002548, |
|
"grad_norm": 1.1931087970733643, |
|
"learning_rate": 2.633962264150944e-06, |
|
"loss": 0.1667, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.5812531839021906, |
|
"grad_norm": 1.242144227027893, |
|
"learning_rate": 2.630188679245283e-06, |
|
"loss": 0.1923, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 1.5832908813041264, |
|
"grad_norm": 1.2944048643112183, |
|
"learning_rate": 2.626415094339623e-06, |
|
"loss": 0.1768, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 1.5853285787060623, |
|
"grad_norm": 1.0808852910995483, |
|
"learning_rate": 2.6226415094339623e-06, |
|
"loss": 0.1721, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 1.587366276107998, |
|
"grad_norm": 1.147532343864441, |
|
"learning_rate": 2.6188679245283022e-06, |
|
"loss": 0.1618, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 1.589403973509934, |
|
"grad_norm": 1.2777063846588135, |
|
"learning_rate": 2.615094339622642e-06, |
|
"loss": 0.1831, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.5914416709118697, |
|
"grad_norm": 1.1522384881973267, |
|
"learning_rate": 2.611320754716981e-06, |
|
"loss": 0.1725, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 1.5934793683138055, |
|
"grad_norm": 1.1645333766937256, |
|
"learning_rate": 2.6075471698113207e-06, |
|
"loss": 0.1724, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 1.5955170657157414, |
|
"grad_norm": 1.1945953369140625, |
|
"learning_rate": 2.6037735849056606e-06, |
|
"loss": 0.182, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 1.5975547631176772, |
|
"grad_norm": 1.2776046991348267, |
|
"learning_rate": 2.6e-06, |
|
"loss": 0.1783, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 1.5995924605196128, |
|
"grad_norm": 1.0407108068466187, |
|
"learning_rate": 2.59622641509434e-06, |
|
"loss": 0.1651, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 1.6016301579215486, |
|
"grad_norm": 1.1741459369659424, |
|
"learning_rate": 2.592452830188679e-06, |
|
"loss": 0.1759, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 1.6036678553234844, |
|
"grad_norm": 1.1339528560638428, |
|
"learning_rate": 2.588679245283019e-06, |
|
"loss": 0.1753, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 1.6057055527254203, |
|
"grad_norm": 1.5073323249816895, |
|
"learning_rate": 2.5849056603773585e-06, |
|
"loss": 0.1828, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 1.607743250127356, |
|
"grad_norm": 1.097970962524414, |
|
"learning_rate": 2.5811320754716984e-06, |
|
"loss": 0.1709, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 1.609780947529292, |
|
"grad_norm": 1.0759773254394531, |
|
"learning_rate": 2.5773584905660383e-06, |
|
"loss": 0.1562, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.6118186449312277, |
|
"grad_norm": 1.1199358701705933, |
|
"learning_rate": 2.5735849056603774e-06, |
|
"loss": 0.1751, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 1.6138563423331636, |
|
"grad_norm": 1.162474513053894, |
|
"learning_rate": 2.569811320754717e-06, |
|
"loss": 0.1691, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 1.6158940397350994, |
|
"grad_norm": 1.170835256576538, |
|
"learning_rate": 2.5660377358490568e-06, |
|
"loss": 0.179, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 1.6179317371370352, |
|
"grad_norm": 1.087983250617981, |
|
"learning_rate": 2.5622641509433967e-06, |
|
"loss": 0.1736, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 1.6199694345389708, |
|
"grad_norm": 1.1620844602584839, |
|
"learning_rate": 2.558490566037736e-06, |
|
"loss": 0.1815, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 1.6220071319409066, |
|
"grad_norm": 1.1823047399520874, |
|
"learning_rate": 2.5547169811320753e-06, |
|
"loss": 0.1797, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 1.6240448293428424, |
|
"grad_norm": 1.1422289609909058, |
|
"learning_rate": 2.550943396226415e-06, |
|
"loss": 0.1812, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 1.6260825267447783, |
|
"grad_norm": 1.2025611400604248, |
|
"learning_rate": 2.547169811320755e-06, |
|
"loss": 0.1807, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 1.628120224146714, |
|
"grad_norm": 1.140370488166809, |
|
"learning_rate": 2.5433962264150946e-06, |
|
"loss": 0.1782, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 1.63015792154865, |
|
"grad_norm": 1.1452966928482056, |
|
"learning_rate": 2.5396226415094345e-06, |
|
"loss": 0.1724, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.6321956189505857, |
|
"grad_norm": 1.217185616493225, |
|
"learning_rate": 2.5358490566037736e-06, |
|
"loss": 0.1807, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 1.6342333163525216, |
|
"grad_norm": 1.0574156045913696, |
|
"learning_rate": 2.5320754716981135e-06, |
|
"loss": 0.1694, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 1.6362710137544574, |
|
"grad_norm": 1.015283226966858, |
|
"learning_rate": 2.528301886792453e-06, |
|
"loss": 0.1713, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 1.6383087111563932, |
|
"grad_norm": 1.1992040872573853, |
|
"learning_rate": 2.524528301886793e-06, |
|
"loss": 0.1844, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 1.640346408558329, |
|
"grad_norm": 1.2918540239334106, |
|
"learning_rate": 2.5207547169811324e-06, |
|
"loss": 0.1813, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 1.6423841059602649, |
|
"grad_norm": 1.1141362190246582, |
|
"learning_rate": 2.516981132075472e-06, |
|
"loss": 0.1763, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 1.6444218033622007, |
|
"grad_norm": 1.0930787324905396, |
|
"learning_rate": 2.5132075471698114e-06, |
|
"loss": 0.184, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 1.6464595007641365, |
|
"grad_norm": 1.1243940591812134, |
|
"learning_rate": 2.5094339622641513e-06, |
|
"loss": 0.1821, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 1.6484971981660723, |
|
"grad_norm": 1.1842948198318481, |
|
"learning_rate": 2.5056603773584908e-06, |
|
"loss": 0.1684, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 1.6505348955680081, |
|
"grad_norm": 1.2824788093566895, |
|
"learning_rate": 2.5018867924528307e-06, |
|
"loss": 0.182, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.652572592969944, |
|
"grad_norm": 1.1476082801818848, |
|
"learning_rate": 2.49811320754717e-06, |
|
"loss": 0.1847, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 1.6546102903718798, |
|
"grad_norm": 1.1569533348083496, |
|
"learning_rate": 2.4943396226415097e-06, |
|
"loss": 0.1815, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 1.6566479877738156, |
|
"grad_norm": 1.1782304048538208, |
|
"learning_rate": 2.490566037735849e-06, |
|
"loss": 0.1754, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 1.6586856851757514, |
|
"grad_norm": 1.1351999044418335, |
|
"learning_rate": 2.486792452830189e-06, |
|
"loss": 0.189, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 1.6607233825776873, |
|
"grad_norm": 1.1230946779251099, |
|
"learning_rate": 2.4830188679245285e-06, |
|
"loss": 0.1781, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 1.662761079979623, |
|
"grad_norm": 1.062568187713623, |
|
"learning_rate": 2.479245283018868e-06, |
|
"loss": 0.1665, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 1.664798777381559, |
|
"grad_norm": 1.1602753400802612, |
|
"learning_rate": 2.4754716981132075e-06, |
|
"loss": 0.1738, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 1.6668364747834947, |
|
"grad_norm": 1.1816747188568115, |
|
"learning_rate": 2.4716981132075474e-06, |
|
"loss": 0.1675, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 1.6688741721854305, |
|
"grad_norm": 1.1682571172714233, |
|
"learning_rate": 2.467924528301887e-06, |
|
"loss": 0.1657, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 1.6709118695873664, |
|
"grad_norm": 1.0787543058395386, |
|
"learning_rate": 2.4641509433962264e-06, |
|
"loss": 0.1717, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.6729495669893022, |
|
"grad_norm": 1.1307450532913208, |
|
"learning_rate": 2.4603773584905663e-06, |
|
"loss": 0.1708, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 1.674987264391238, |
|
"grad_norm": 1.1192117929458618, |
|
"learning_rate": 2.456603773584906e-06, |
|
"loss": 0.1799, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 1.6770249617931738, |
|
"grad_norm": 1.1960910558700562, |
|
"learning_rate": 2.4528301886792453e-06, |
|
"loss": 0.1703, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 1.6790626591951097, |
|
"grad_norm": 1.1331156492233276, |
|
"learning_rate": 2.4490566037735852e-06, |
|
"loss": 0.1593, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 1.6811003565970455, |
|
"grad_norm": 1.2134394645690918, |
|
"learning_rate": 2.4452830188679247e-06, |
|
"loss": 0.1964, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.6831380539989813, |
|
"grad_norm": 1.178653597831726, |
|
"learning_rate": 2.4415094339622642e-06, |
|
"loss": 0.1864, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 1.685175751400917, |
|
"grad_norm": 1.0972850322723389, |
|
"learning_rate": 2.4377358490566037e-06, |
|
"loss": 0.1637, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 1.6872134488028527, |
|
"grad_norm": 1.0110701322555542, |
|
"learning_rate": 2.4339622641509436e-06, |
|
"loss": 0.1758, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 1.6892511462047886, |
|
"grad_norm": 1.0509426593780518, |
|
"learning_rate": 2.4301886792452835e-06, |
|
"loss": 0.1708, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 1.6912888436067244, |
|
"grad_norm": 1.247532844543457, |
|
"learning_rate": 2.4264150943396226e-06, |
|
"loss": 0.1754, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.6933265410086602, |
|
"grad_norm": 1.1235079765319824, |
|
"learning_rate": 2.4226415094339625e-06, |
|
"loss": 0.1699, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 1.695364238410596, |
|
"grad_norm": 1.2192139625549316, |
|
"learning_rate": 2.418867924528302e-06, |
|
"loss": 0.1851, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 1.6974019358125318, |
|
"grad_norm": 1.2487667798995972, |
|
"learning_rate": 2.415094339622642e-06, |
|
"loss": 0.1926, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 1.6994396332144677, |
|
"grad_norm": 1.1878374814987183, |
|
"learning_rate": 2.4113207547169814e-06, |
|
"loss": 0.2017, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 1.7014773306164035, |
|
"grad_norm": 1.027300238609314, |
|
"learning_rate": 2.407547169811321e-06, |
|
"loss": 0.1725, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 1.7035150280183393, |
|
"grad_norm": 1.0987987518310547, |
|
"learning_rate": 2.403773584905661e-06, |
|
"loss": 0.1793, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 1.7055527254202751, |
|
"grad_norm": 1.08310067653656, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 0.1715, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 1.7075904228222107, |
|
"grad_norm": 1.255993366241455, |
|
"learning_rate": 2.39622641509434e-06, |
|
"loss": 0.1769, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 1.7096281202241466, |
|
"grad_norm": 1.1966819763183594, |
|
"learning_rate": 2.3924528301886797e-06, |
|
"loss": 0.1661, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 1.7116658176260824, |
|
"grad_norm": 1.22041916847229, |
|
"learning_rate": 2.388679245283019e-06, |
|
"loss": 0.172, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.7137035150280182, |
|
"grad_norm": 1.0473703145980835, |
|
"learning_rate": 2.3849056603773587e-06, |
|
"loss": 0.1555, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 1.715741212429954, |
|
"grad_norm": 1.0921486616134644, |
|
"learning_rate": 2.381132075471698e-06, |
|
"loss": 0.1739, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 1.7177789098318899, |
|
"grad_norm": 1.1403447389602661, |
|
"learning_rate": 2.377358490566038e-06, |
|
"loss": 0.1807, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 1.7198166072338257, |
|
"grad_norm": 1.1131690740585327, |
|
"learning_rate": 2.3735849056603776e-06, |
|
"loss": 0.1874, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 1.7218543046357615, |
|
"grad_norm": 1.1460295915603638, |
|
"learning_rate": 2.369811320754717e-06, |
|
"loss": 0.1709, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 1.7238920020376973, |
|
"grad_norm": 1.1869096755981445, |
|
"learning_rate": 2.366037735849057e-06, |
|
"loss": 0.1883, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 1.7259296994396331, |
|
"grad_norm": 1.1736819744110107, |
|
"learning_rate": 2.3622641509433965e-06, |
|
"loss": 0.1754, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 1.727967396841569, |
|
"grad_norm": 1.213629126548767, |
|
"learning_rate": 2.358490566037736e-06, |
|
"loss": 0.1677, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 1.7300050942435048, |
|
"grad_norm": 1.0772464275360107, |
|
"learning_rate": 2.3547169811320755e-06, |
|
"loss": 0.1769, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 1.7320427916454406, |
|
"grad_norm": 1.1553244590759277, |
|
"learning_rate": 2.3509433962264154e-06, |
|
"loss": 0.1817, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.7340804890473764, |
|
"grad_norm": 1.0742902755737305, |
|
"learning_rate": 2.347169811320755e-06, |
|
"loss": 0.1635, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 1.7361181864493123, |
|
"grad_norm": 1.1944258213043213, |
|
"learning_rate": 2.3433962264150944e-06, |
|
"loss": 0.1757, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 1.738155883851248, |
|
"grad_norm": 1.1923333406448364, |
|
"learning_rate": 2.3396226415094343e-06, |
|
"loss": 0.1665, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 1.740193581253184, |
|
"grad_norm": 1.086665153503418, |
|
"learning_rate": 2.3358490566037738e-06, |
|
"loss": 0.1803, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 1.7422312786551197, |
|
"grad_norm": 1.0686219930648804, |
|
"learning_rate": 2.3320754716981133e-06, |
|
"loss": 0.1757, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 1.7442689760570556, |
|
"grad_norm": 1.6613824367523193, |
|
"learning_rate": 2.328301886792453e-06, |
|
"loss": 0.1927, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 1.7463066734589914, |
|
"grad_norm": 1.305106282234192, |
|
"learning_rate": 2.3245283018867927e-06, |
|
"loss": 0.1882, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 1.7483443708609272, |
|
"grad_norm": 1.091124176979065, |
|
"learning_rate": 2.320754716981132e-06, |
|
"loss": 0.1707, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 1.750382068262863, |
|
"grad_norm": 1.073729157447815, |
|
"learning_rate": 2.3169811320754717e-06, |
|
"loss": 0.1891, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 1.7524197656647988, |
|
"grad_norm": 1.2147339582443237, |
|
"learning_rate": 2.3132075471698116e-06, |
|
"loss": 0.1734, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.7544574630667347, |
|
"grad_norm": 1.085634708404541, |
|
"learning_rate": 2.309433962264151e-06, |
|
"loss": 0.1577, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 1.7564951604686705, |
|
"grad_norm": 1.220919132232666, |
|
"learning_rate": 2.3056603773584906e-06, |
|
"loss": 0.1763, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 1.7585328578706063, |
|
"grad_norm": 1.3067682981491089, |
|
"learning_rate": 2.3018867924528305e-06, |
|
"loss": 0.1805, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 1.7605705552725421, |
|
"grad_norm": 1.3163460493087769, |
|
"learning_rate": 2.29811320754717e-06, |
|
"loss": 0.1737, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 1.762608252674478, |
|
"grad_norm": 1.1450026035308838, |
|
"learning_rate": 2.2943396226415095e-06, |
|
"loss": 0.1727, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 1.7646459500764138, |
|
"grad_norm": 1.0936638116836548, |
|
"learning_rate": 2.2905660377358494e-06, |
|
"loss": 0.1772, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 1.7666836474783496, |
|
"grad_norm": 1.2066489458084106, |
|
"learning_rate": 2.286792452830189e-06, |
|
"loss": 0.1777, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 1.7687213448802854, |
|
"grad_norm": 1.2631739377975464, |
|
"learning_rate": 2.2830188679245283e-06, |
|
"loss": 0.1765, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 1.7707590422822213, |
|
"grad_norm": 1.1708970069885254, |
|
"learning_rate": 2.279245283018868e-06, |
|
"loss": 0.1834, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 1.7727967396841569, |
|
"grad_norm": 1.0745712518692017, |
|
"learning_rate": 2.2754716981132078e-06, |
|
"loss": 0.1659, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.7748344370860927, |
|
"grad_norm": 1.243639588356018, |
|
"learning_rate": 2.2716981132075477e-06, |
|
"loss": 0.1683, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 1.7768721344880285, |
|
"grad_norm": 1.2835688591003418, |
|
"learning_rate": 2.2679245283018867e-06, |
|
"loss": 0.1749, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 1.7789098318899643, |
|
"grad_norm": 1.3315813541412354, |
|
"learning_rate": 2.2641509433962266e-06, |
|
"loss": 0.187, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 1.7809475292919001, |
|
"grad_norm": 1.525321125984192, |
|
"learning_rate": 2.260377358490566e-06, |
|
"loss": 0.1838, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 1.782985226693836, |
|
"grad_norm": 1.1951662302017212, |
|
"learning_rate": 2.256603773584906e-06, |
|
"loss": 0.1902, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.7850229240957718, |
|
"grad_norm": 1.2421764135360718, |
|
"learning_rate": 2.2528301886792455e-06, |
|
"loss": 0.1825, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 1.7870606214977076, |
|
"grad_norm": 1.1425124406814575, |
|
"learning_rate": 2.249056603773585e-06, |
|
"loss": 0.1647, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 1.7890983188996434, |
|
"grad_norm": 1.6294941902160645, |
|
"learning_rate": 2.245283018867925e-06, |
|
"loss": 0.1848, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 1.7911360163015793, |
|
"grad_norm": 1.0840221643447876, |
|
"learning_rate": 2.241509433962264e-06, |
|
"loss": 0.178, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 1.793173713703515, |
|
"grad_norm": 1.1134402751922607, |
|
"learning_rate": 2.237735849056604e-06, |
|
"loss": 0.1563, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.7952114111054507, |
|
"grad_norm": 1.2107329368591309, |
|
"learning_rate": 2.233962264150944e-06, |
|
"loss": 0.1756, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 1.7972491085073865, |
|
"grad_norm": 1.2982094287872314, |
|
"learning_rate": 2.2301886792452833e-06, |
|
"loss": 0.1793, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 1.7992868059093223, |
|
"grad_norm": 1.2917886972427368, |
|
"learning_rate": 2.226415094339623e-06, |
|
"loss": 0.1666, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 1.8013245033112582, |
|
"grad_norm": 1.23494553565979, |
|
"learning_rate": 2.2226415094339623e-06, |
|
"loss": 0.1707, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 1.803362200713194, |
|
"grad_norm": 1.1923739910125732, |
|
"learning_rate": 2.2188679245283022e-06, |
|
"loss": 0.1767, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 1.8053998981151298, |
|
"grad_norm": 1.1137254238128662, |
|
"learning_rate": 2.2150943396226417e-06, |
|
"loss": 0.1795, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 1.8074375955170656, |
|
"grad_norm": 1.1190637350082397, |
|
"learning_rate": 2.2113207547169812e-06, |
|
"loss": 0.1766, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 1.8094752929190014, |
|
"grad_norm": 1.1797064542770386, |
|
"learning_rate": 2.207547169811321e-06, |
|
"loss": 0.1713, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 1.8115129903209373, |
|
"grad_norm": 1.1107820272445679, |
|
"learning_rate": 2.2037735849056606e-06, |
|
"loss": 0.1826, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 1.813550687722873, |
|
"grad_norm": 1.1796709299087524, |
|
"learning_rate": 2.2e-06, |
|
"loss": 0.1771, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.815588385124809, |
|
"grad_norm": 1.0448757410049438, |
|
"learning_rate": 2.19622641509434e-06, |
|
"loss": 0.1673, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 1.8176260825267447, |
|
"grad_norm": 1.1002962589263916, |
|
"learning_rate": 2.1924528301886795e-06, |
|
"loss": 0.1612, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 1.8196637799286806, |
|
"grad_norm": 1.2181810140609741, |
|
"learning_rate": 2.188679245283019e-06, |
|
"loss": 0.1752, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 1.8217014773306164, |
|
"grad_norm": 1.2177342176437378, |
|
"learning_rate": 2.1849056603773585e-06, |
|
"loss": 0.1713, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 1.8237391747325522, |
|
"grad_norm": 1.070660948753357, |
|
"learning_rate": 2.1811320754716984e-06, |
|
"loss": 0.1707, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 1.825776872134488, |
|
"grad_norm": 1.083571434020996, |
|
"learning_rate": 2.177358490566038e-06, |
|
"loss": 0.1743, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 1.8278145695364238, |
|
"grad_norm": 1.2324374914169312, |
|
"learning_rate": 2.1735849056603774e-06, |
|
"loss": 0.1876, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 1.8298522669383597, |
|
"grad_norm": 1.1662664413452148, |
|
"learning_rate": 2.1698113207547173e-06, |
|
"loss": 0.172, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 1.8318899643402955, |
|
"grad_norm": 1.0966416597366333, |
|
"learning_rate": 2.166037735849057e-06, |
|
"loss": 0.1784, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 1.8339276617422313, |
|
"grad_norm": 1.0962932109832764, |
|
"learning_rate": 2.1622641509433963e-06, |
|
"loss": 0.1791, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.8359653591441671, |
|
"grad_norm": 1.1369909048080444, |
|
"learning_rate": 2.158490566037736e-06, |
|
"loss": 0.1797, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 1.838003056546103, |
|
"grad_norm": 1.1816999912261963, |
|
"learning_rate": 2.1547169811320757e-06, |
|
"loss": 0.169, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 1.8400407539480388, |
|
"grad_norm": 1.1556625366210938, |
|
"learning_rate": 2.150943396226415e-06, |
|
"loss": 0.17, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 1.8420784513499746, |
|
"grad_norm": 1.0892881155014038, |
|
"learning_rate": 2.1471698113207547e-06, |
|
"loss": 0.1652, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 1.8441161487519104, |
|
"grad_norm": 1.3906255960464478, |
|
"learning_rate": 2.1433962264150946e-06, |
|
"loss": 0.1839, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 1.8461538461538463, |
|
"grad_norm": 1.0891425609588623, |
|
"learning_rate": 2.139622641509434e-06, |
|
"loss": 0.1773, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 1.848191543555782, |
|
"grad_norm": 1.1463353633880615, |
|
"learning_rate": 2.1358490566037736e-06, |
|
"loss": 0.1808, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 1.850229240957718, |
|
"grad_norm": 1.086715579032898, |
|
"learning_rate": 2.1320754716981135e-06, |
|
"loss": 0.1804, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 1.8522669383596537, |
|
"grad_norm": 1.102216124534607, |
|
"learning_rate": 2.128301886792453e-06, |
|
"loss": 0.1729, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 1.8543046357615895, |
|
"grad_norm": 1.2313193082809448, |
|
"learning_rate": 2.1245283018867925e-06, |
|
"loss": 0.1906, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.8563423331635254, |
|
"grad_norm": 1.3457517623901367, |
|
"learning_rate": 2.120754716981132e-06, |
|
"loss": 0.1675, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 1.8583800305654612, |
|
"grad_norm": 1.1635335683822632, |
|
"learning_rate": 2.116981132075472e-06, |
|
"loss": 0.1775, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 1.8604177279673968, |
|
"grad_norm": 1.2560811042785645, |
|
"learning_rate": 2.1132075471698114e-06, |
|
"loss": 0.1694, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 1.8624554253693326, |
|
"grad_norm": 1.1669859886169434, |
|
"learning_rate": 2.109433962264151e-06, |
|
"loss": 0.1724, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 1.8644931227712684, |
|
"grad_norm": 1.1948050260543823, |
|
"learning_rate": 2.1056603773584908e-06, |
|
"loss": 0.1858, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 1.8665308201732043, |
|
"grad_norm": 1.201643705368042, |
|
"learning_rate": 2.1018867924528303e-06, |
|
"loss": 0.1636, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 1.86856851757514, |
|
"grad_norm": 1.0382106304168701, |
|
"learning_rate": 2.0981132075471698e-06, |
|
"loss": 0.1664, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 1.870606214977076, |
|
"grad_norm": 1.1447466611862183, |
|
"learning_rate": 2.0943396226415097e-06, |
|
"loss": 0.1805, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 1.8726439123790117, |
|
"grad_norm": 1.0567753314971924, |
|
"learning_rate": 2.090566037735849e-06, |
|
"loss": 0.1847, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 1.8746816097809476, |
|
"grad_norm": 1.1714054346084595, |
|
"learning_rate": 2.086792452830189e-06, |
|
"loss": 0.1699, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.8767193071828834, |
|
"grad_norm": 1.112230658531189, |
|
"learning_rate": 2.083018867924528e-06, |
|
"loss": 0.1709, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 1.8787570045848192, |
|
"grad_norm": 1.248382329940796, |
|
"learning_rate": 2.079245283018868e-06, |
|
"loss": 0.1854, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 1.8807947019867548, |
|
"grad_norm": 1.0857242345809937, |
|
"learning_rate": 2.075471698113208e-06, |
|
"loss": 0.1647, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 1.8828323993886906, |
|
"grad_norm": 1.1596136093139648, |
|
"learning_rate": 2.0716981132075475e-06, |
|
"loss": 0.1884, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 1.8848700967906264, |
|
"grad_norm": 1.0483487844467163, |
|
"learning_rate": 2.067924528301887e-06, |
|
"loss": 0.1825, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.8869077941925623, |
|
"grad_norm": 1.1504698991775513, |
|
"learning_rate": 2.0641509433962264e-06, |
|
"loss": 0.1788, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 1.888945491594498, |
|
"grad_norm": 1.0937446355819702, |
|
"learning_rate": 2.0603773584905664e-06, |
|
"loss": 0.1714, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 1.890983188996434, |
|
"grad_norm": 1.2522544860839844, |
|
"learning_rate": 2.056603773584906e-06, |
|
"loss": 0.1895, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 1.8930208863983697, |
|
"grad_norm": 1.0965933799743652, |
|
"learning_rate": 2.0528301886792453e-06, |
|
"loss": 0.1735, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 1.8950585838003056, |
|
"grad_norm": 1.3030322790145874, |
|
"learning_rate": 2.0490566037735853e-06, |
|
"loss": 0.1705, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.8970962812022414, |
|
"grad_norm": 1.1427980661392212, |
|
"learning_rate": 2.0452830188679247e-06, |
|
"loss": 0.1791, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 1.8991339786041772, |
|
"grad_norm": 1.1021360158920288, |
|
"learning_rate": 2.0415094339622642e-06, |
|
"loss": 0.1626, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 1.901171676006113, |
|
"grad_norm": 1.225327968597412, |
|
"learning_rate": 2.037735849056604e-06, |
|
"loss": 0.1746, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 1.9032093734080489, |
|
"grad_norm": 1.4384862184524536, |
|
"learning_rate": 2.0339622641509436e-06, |
|
"loss": 0.1811, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 1.9052470708099847, |
|
"grad_norm": 1.1396024227142334, |
|
"learning_rate": 2.030188679245283e-06, |
|
"loss": 0.1645, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 1.9072847682119205, |
|
"grad_norm": 1.1487840414047241, |
|
"learning_rate": 2.0264150943396226e-06, |
|
"loss": 0.1727, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 1.9093224656138563, |
|
"grad_norm": 1.137575387954712, |
|
"learning_rate": 2.0226415094339625e-06, |
|
"loss": 0.1673, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 1.9113601630157921, |
|
"grad_norm": 1.1038920879364014, |
|
"learning_rate": 2.018867924528302e-06, |
|
"loss": 0.1758, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 1.913397860417728, |
|
"grad_norm": 1.162651777267456, |
|
"learning_rate": 2.0150943396226415e-06, |
|
"loss": 0.1658, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 1.9154355578196638, |
|
"grad_norm": 1.0977519750595093, |
|
"learning_rate": 2.0113207547169814e-06, |
|
"loss": 0.179, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.9174732552215996, |
|
"grad_norm": 1.3130261898040771, |
|
"learning_rate": 2.007547169811321e-06, |
|
"loss": 0.1701, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 1.9195109526235354, |
|
"grad_norm": 1.1742639541625977, |
|
"learning_rate": 2.0037735849056604e-06, |
|
"loss": 0.1674, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 1.9215486500254713, |
|
"grad_norm": 1.160561203956604, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 0.1765, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 1.923586347427407, |
|
"grad_norm": 1.3576925992965698, |
|
"learning_rate": 1.99622641509434e-06, |
|
"loss": 0.1718, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 1.925624044829343, |
|
"grad_norm": 1.101428747177124, |
|
"learning_rate": 1.9924528301886793e-06, |
|
"loss": 0.1843, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 1.9276617422312787, |
|
"grad_norm": 1.271204948425293, |
|
"learning_rate": 1.988679245283019e-06, |
|
"loss": 0.1696, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 1.9296994396332146, |
|
"grad_norm": 1.2334147691726685, |
|
"learning_rate": 1.9849056603773587e-06, |
|
"loss": 0.1713, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 1.9317371370351504, |
|
"grad_norm": 1.2214933633804321, |
|
"learning_rate": 1.981132075471698e-06, |
|
"loss": 0.1819, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 1.9337748344370862, |
|
"grad_norm": 1.1319000720977783, |
|
"learning_rate": 1.9773584905660377e-06, |
|
"loss": 0.1763, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 1.935812531839022, |
|
"grad_norm": 1.0747102499008179, |
|
"learning_rate": 1.9735849056603776e-06, |
|
"loss": 0.179, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.9378502292409578, |
|
"grad_norm": 1.1016148328781128, |
|
"learning_rate": 1.969811320754717e-06, |
|
"loss": 0.1723, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 1.9398879266428937, |
|
"grad_norm": 1.3073527812957764, |
|
"learning_rate": 1.9660377358490566e-06, |
|
"loss": 0.18, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 1.9419256240448295, |
|
"grad_norm": 1.2624202966690063, |
|
"learning_rate": 1.9622641509433965e-06, |
|
"loss": 0.1945, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 1.9439633214467653, |
|
"grad_norm": 1.3091782331466675, |
|
"learning_rate": 1.958490566037736e-06, |
|
"loss": 0.1938, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 1.946001018848701, |
|
"grad_norm": 1.136667251586914, |
|
"learning_rate": 1.9547169811320755e-06, |
|
"loss": 0.1808, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.9480387162506367, |
|
"grad_norm": 1.1663713455200195, |
|
"learning_rate": 1.950943396226415e-06, |
|
"loss": 0.1735, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 1.9500764136525726, |
|
"grad_norm": 1.2320809364318848, |
|
"learning_rate": 1.947169811320755e-06, |
|
"loss": 0.1757, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 1.9521141110545084, |
|
"grad_norm": 1.174214243888855, |
|
"learning_rate": 1.943396226415095e-06, |
|
"loss": 0.1776, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 1.9541518084564442, |
|
"grad_norm": 1.2423794269561768, |
|
"learning_rate": 1.939622641509434e-06, |
|
"loss": 0.1921, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 1.95618950585838, |
|
"grad_norm": 1.1554875373840332, |
|
"learning_rate": 1.935849056603774e-06, |
|
"loss": 0.1698, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.9582272032603159, |
|
"grad_norm": 1.1640571355819702, |
|
"learning_rate": 1.9320754716981133e-06, |
|
"loss": 0.1838, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 1.9602649006622517, |
|
"grad_norm": 1.1926047801971436, |
|
"learning_rate": 1.928301886792453e-06, |
|
"loss": 0.1775, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 1.9623025980641875, |
|
"grad_norm": 1.2760028839111328, |
|
"learning_rate": 1.9245283018867927e-06, |
|
"loss": 0.178, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 1.9643402954661233, |
|
"grad_norm": 1.1897207498550415, |
|
"learning_rate": 1.920754716981132e-06, |
|
"loss": 0.1675, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 1.9663779928680591, |
|
"grad_norm": 1.0810887813568115, |
|
"learning_rate": 1.916981132075472e-06, |
|
"loss": 0.1631, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 1.9684156902699947, |
|
"grad_norm": 1.1327540874481201, |
|
"learning_rate": 1.9132075471698116e-06, |
|
"loss": 0.1834, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 1.9704533876719306, |
|
"grad_norm": 1.0766308307647705, |
|
"learning_rate": 1.909433962264151e-06, |
|
"loss": 0.1756, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 1.9724910850738664, |
|
"grad_norm": 1.1231815814971924, |
|
"learning_rate": 1.9056603773584908e-06, |
|
"loss": 0.1736, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 1.9745287824758022, |
|
"grad_norm": 1.10451078414917, |
|
"learning_rate": 1.9018867924528303e-06, |
|
"loss": 0.1848, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 1.976566479877738, |
|
"grad_norm": 1.114749789237976, |
|
"learning_rate": 1.89811320754717e-06, |
|
"loss": 0.1685, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.9786041772796739, |
|
"grad_norm": 1.1218091249465942, |
|
"learning_rate": 1.8943396226415095e-06, |
|
"loss": 0.1644, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 1.9806418746816097, |
|
"grad_norm": 1.1256656646728516, |
|
"learning_rate": 1.8905660377358492e-06, |
|
"loss": 0.1686, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 1.9826795720835455, |
|
"grad_norm": 1.2012169361114502, |
|
"learning_rate": 1.8867924528301889e-06, |
|
"loss": 0.1906, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 1.9847172694854813, |
|
"grad_norm": 1.1859033107757568, |
|
"learning_rate": 1.8830188679245284e-06, |
|
"loss": 0.1818, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 1.9867549668874172, |
|
"grad_norm": 1.1662039756774902, |
|
"learning_rate": 1.879245283018868e-06, |
|
"loss": 0.1928, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.988792664289353, |
|
"grad_norm": 1.1107443571090698, |
|
"learning_rate": 1.8754716981132076e-06, |
|
"loss": 0.1701, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 1.9908303616912888, |
|
"grad_norm": 1.1272541284561157, |
|
"learning_rate": 1.8716981132075473e-06, |
|
"loss": 0.1676, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 1.9928680590932246, |
|
"grad_norm": 1.158721923828125, |
|
"learning_rate": 1.8679245283018868e-06, |
|
"loss": 0.1642, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 1.9949057564951604, |
|
"grad_norm": 1.1401432752609253, |
|
"learning_rate": 1.8641509433962265e-06, |
|
"loss": 0.1674, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 1.9969434538970963, |
|
"grad_norm": 1.1835023164749146, |
|
"learning_rate": 1.8603773584905664e-06, |
|
"loss": 0.1812, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.998981151299032, |
|
"grad_norm": 1.2545579671859741, |
|
"learning_rate": 1.8566037735849056e-06, |
|
"loss": 0.1858, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 2.001018848700968, |
|
"grad_norm": 1.0314708948135376, |
|
"learning_rate": 1.8528301886792456e-06, |
|
"loss": 0.1435, |
|
"step": 982 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 1473, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 491, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.3189159645267624e+19, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|