|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.99576743149922, |
|
"eval_steps": 500, |
|
"global_step": 1050, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.028514145689463134, |
|
"grad_norm": 59.78074645996094, |
|
"learning_rate": 9.374999999999999e-05, |
|
"loss": 0.7842, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05702829137892627, |
|
"grad_norm": 34.404449462890625, |
|
"learning_rate": 0.00018749999999999998, |
|
"loss": 0.7352, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08554243706838939, |
|
"grad_norm": 32.21347427368164, |
|
"learning_rate": 0.00028125, |
|
"loss": 0.7214, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.11405658275785253, |
|
"grad_norm": 58.45460891723633, |
|
"learning_rate": 0.00029995428872157097, |
|
"loss": 0.7276, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.14257072844731566, |
|
"grad_norm": 34.381004333496094, |
|
"learning_rate": 0.00029976863440074164, |
|
"loss": 0.7381, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.17108487413677878, |
|
"grad_norm": 23.00215721130371, |
|
"learning_rate": 0.0002994403567435186, |
|
"loss": 0.696, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.19959901982624192, |
|
"grad_norm": 28.15259552001953, |
|
"learning_rate": 0.0002989697683657906, |
|
"loss": 0.6695, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.22811316551570507, |
|
"grad_norm": 23.132097244262695, |
|
"learning_rate": 0.0002983573174046776, |
|
"loss": 0.6745, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2566273112051682, |
|
"grad_norm": 26.72660255432129, |
|
"learning_rate": 0.00029760358709177425, |
|
"loss": 0.6742, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.28514145689463133, |
|
"grad_norm": 21.0267391204834, |
|
"learning_rate": 0.00029670929519774324, |
|
"loss": 0.6797, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.3136556025840945, |
|
"grad_norm": 24.07339096069336, |
|
"learning_rate": 0.0002956752933487888, |
|
"loss": 0.6582, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.34216974827355756, |
|
"grad_norm": 32.077362060546875, |
|
"learning_rate": 0.00029450256621566076, |
|
"loss": 0.6531, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.3706838939630207, |
|
"grad_norm": 23.832252502441406, |
|
"learning_rate": 0.0002931922305759614, |
|
"loss": 0.6584, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.39919803965248385, |
|
"grad_norm": 21.751239776611328, |
|
"learning_rate": 0.00029174553425064773, |
|
"loss": 0.6557, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.427712185341947, |
|
"grad_norm": 24.70648956298828, |
|
"learning_rate": 0.00029016385491574314, |
|
"loss": 0.6376, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.45622633103141014, |
|
"grad_norm": 23.676149368286133, |
|
"learning_rate": 0.00028844869879038863, |
|
"loss": 0.6424, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.4847404767208732, |
|
"grad_norm": 26.275875091552734, |
|
"learning_rate": 0.0002866016992024837, |
|
"loss": 0.633, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.5132546224103364, |
|
"grad_norm": 23.403223037719727, |
|
"learning_rate": 0.0002846246150332827, |
|
"loss": 0.6203, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.5417687680997995, |
|
"grad_norm": 23.481271743774414, |
|
"learning_rate": 0.000282519329042428, |
|
"loss": 0.6211, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5702829137892627, |
|
"grad_norm": 20.71142578125, |
|
"learning_rate": 0.00028028784607501473, |
|
"loss": 0.6232, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5987970594787257, |
|
"grad_norm": 20.313270568847656, |
|
"learning_rate": 0.00027793229115239456, |
|
"loss": 0.6035, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.627311205168189, |
|
"grad_norm": 15.384033203125, |
|
"learning_rate": 0.0002754549074485369, |
|
"loss": 0.6082, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.655825350857652, |
|
"grad_norm": 20.352094650268555, |
|
"learning_rate": 0.0002728580541538743, |
|
"loss": 0.6045, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.6843394965471151, |
|
"grad_norm": 20.499040603637695, |
|
"learning_rate": 0.0002701442042286665, |
|
"loss": 0.6077, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.7128536422365783, |
|
"grad_norm": 19.507705688476562, |
|
"learning_rate": 0.000267315942048022, |
|
"loss": 0.6052, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.7413677879260414, |
|
"grad_norm": 23.443056106567383, |
|
"learning_rate": 0.0002643759609408212, |
|
"loss": 0.5917, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7698819336155046, |
|
"grad_norm": 22.090147018432617, |
|
"learning_rate": 0.00026132706062488294, |
|
"loss": 0.596, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.7983960793049677, |
|
"grad_norm": 22.015439987182617, |
|
"learning_rate": 0.0002581721445408184, |
|
"loss": 0.5923, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.8269102249944308, |
|
"grad_norm": 19.542490005493164, |
|
"learning_rate": 0.0002549142170871103, |
|
"loss": 0.5955, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.855424370683894, |
|
"grad_norm": 17.32285499572754, |
|
"learning_rate": 0.00025155638075905097, |
|
"loss": 0.566, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8839385163733571, |
|
"grad_norm": 18.898284912109375, |
|
"learning_rate": 0.00024810183319426394, |
|
"loss": 0.5677, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.9124526620628203, |
|
"grad_norm": 16.297840118408203, |
|
"learning_rate": 0.00024455386412762184, |
|
"loss": 0.577, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.9409668077522834, |
|
"grad_norm": 22.982707977294922, |
|
"learning_rate": 0.00024091585225846125, |
|
"loss": 0.57, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.9694809534417465, |
|
"grad_norm": 20.184415817260742, |
|
"learning_rate": 0.00023719126203307778, |
|
"loss": 0.5743, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.9979950991312097, |
|
"grad_norm": 16.89832878112793, |
|
"learning_rate": 0.00023338364034556413, |
|
"loss": 0.5663, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.0274003118734685, |
|
"grad_norm": 16.95356559753418, |
|
"learning_rate": 0.00022949661316013482, |
|
"loss": 0.4709, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.0559144575629316, |
|
"grad_norm": 20.939350128173828, |
|
"learning_rate": 0.0002255338820581528, |
|
"loss": 0.4702, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.0844286032523947, |
|
"grad_norm": 19.86914825439453, |
|
"learning_rate": 0.0002214992207131462, |
|
"loss": 0.4728, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.112942748941858, |
|
"grad_norm": 19.765581130981445, |
|
"learning_rate": 0.0002173964712971729, |
|
"loss": 0.4664, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.141456894631321, |
|
"grad_norm": 16.14029884338379, |
|
"learning_rate": 0.00021322954082195433, |
|
"loss": 0.4696, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.1699710403207841, |
|
"grad_norm": 17.055089950561523, |
|
"learning_rate": 0.00020900239741826278, |
|
"loss": 0.4717, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.1984851860102472, |
|
"grad_norm": 14.829668045043945, |
|
"learning_rate": 0.00020471906655710603, |
|
"loss": 0.4716, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.2269993316997103, |
|
"grad_norm": 15.102470397949219, |
|
"learning_rate": 0.00020038362721630696, |
|
"loss": 0.4597, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.2555134773891736, |
|
"grad_norm": 19.483240127563477, |
|
"learning_rate": 0.00019600020799612964, |
|
"loss": 0.4582, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.2840276230786367, |
|
"grad_norm": 20.06715202331543, |
|
"learning_rate": 0.00019157298318764958, |
|
"loss": 0.4564, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.3125417687680998, |
|
"grad_norm": 16.547321319580078, |
|
"learning_rate": 0.00018710616879761405, |
|
"loss": 0.4572, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.341055914457563, |
|
"grad_norm": 15.825061798095703, |
|
"learning_rate": 0.0001826040185335761, |
|
"loss": 0.468, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.369570060147026, |
|
"grad_norm": 15.663127899169922, |
|
"learning_rate": 0.0001780708197531268, |
|
"loss": 0.4525, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.398084205836489, |
|
"grad_norm": 14.849474906921387, |
|
"learning_rate": 0.00017351088938108276, |
|
"loss": 0.4561, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.4265983515259524, |
|
"grad_norm": 15.581180572509766, |
|
"learning_rate": 0.00016892856979851725, |
|
"loss": 0.4603, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.4551124972154155, |
|
"grad_norm": 14.99488353729248, |
|
"learning_rate": 0.00016432822470754922, |
|
"loss": 0.446, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.4836266429048786, |
|
"grad_norm": 16.289323806762695, |
|
"learning_rate": 0.00015971423497582873, |
|
"loss": 0.4534, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.5121407885943419, |
|
"grad_norm": 15.169504165649414, |
|
"learning_rate": 0.00015509099446467557, |
|
"loss": 0.4502, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.540654934283805, |
|
"grad_norm": 13.88201904296875, |
|
"learning_rate": 0.00015046290584484455, |
|
"loss": 0.4563, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.569169079973268, |
|
"grad_norm": 14.23528003692627, |
|
"learning_rate": 0.00014583437640390112, |
|
"loss": 0.4303, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.5976832256627311, |
|
"grad_norm": 13.917679786682129, |
|
"learning_rate": 0.00014120981384920065, |
|
"loss": 0.4396, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.6261973713521942, |
|
"grad_norm": 15.415020942687988, |
|
"learning_rate": 0.0001365936221104682, |
|
"loss": 0.4486, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.6547115170416573, |
|
"grad_norm": 13.339295387268066, |
|
"learning_rate": 0.00013199019714597526, |
|
"loss": 0.4303, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.6832256627311204, |
|
"grad_norm": 15.275891304016113, |
|
"learning_rate": 0.00012740392275630802, |
|
"loss": 0.4434, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.7117398084205835, |
|
"grad_norm": 15.294014930725098, |
|
"learning_rate": 0.00012283916640971304, |
|
"loss": 0.4352, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.7402539541100468, |
|
"grad_norm": 13.472885131835938, |
|
"learning_rate": 0.00011830027508299607, |
|
"loss": 0.4311, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.7687680997995099, |
|
"grad_norm": 13.640401840209961, |
|
"learning_rate": 0.00011379157112193487, |
|
"loss": 0.4192, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.7972822454889732, |
|
"grad_norm": 14.599030494689941, |
|
"learning_rate": 0.00010931734812514786, |
|
"loss": 0.4349, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.8257963911784363, |
|
"grad_norm": 13.727120399475098, |
|
"learning_rate": 0.00010488186685533828, |
|
"loss": 0.4348, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.8543105368678994, |
|
"grad_norm": 14.04608154296875, |
|
"learning_rate": 0.00010048935118180787, |
|
"loss": 0.428, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.8828246825573625, |
|
"grad_norm": 14.668761253356934, |
|
"learning_rate": 9.614398405810378e-05, |
|
"loss": 0.4271, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.9113388282468255, |
|
"grad_norm": 15.308387756347656, |
|
"learning_rate": 9.18499035386292e-05, |
|
"loss": 0.4153, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.9398529739362886, |
|
"grad_norm": 14.18338680267334, |
|
"learning_rate": 8.761119883801097e-05, |
|
"loss": 0.4237, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.9683671196257517, |
|
"grad_norm": 15.16002368927002, |
|
"learning_rate": 8.343190643697685e-05, |
|
"loss": 0.4097, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.9968812653152148, |
|
"grad_norm": 12.727019309997559, |
|
"learning_rate": 7.931600623845105e-05, |
|
"loss": 0.4035, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.026286478057474, |
|
"grad_norm": 16.409337997436523, |
|
"learning_rate": 7.526741777752797e-05, |
|
"loss": 0.2883, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.054800623746937, |
|
"grad_norm": 14.678768157958984, |
|
"learning_rate": 7.128999648893393e-05, |
|
"loss": 0.2779, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.0833147694364, |
|
"grad_norm": 15.050424575805664, |
|
"learning_rate": 6.738753003553106e-05, |
|
"loss": 0.2766, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.111828915125863, |
|
"grad_norm": 13.391814231872559, |
|
"learning_rate": 6.356373470135943e-05, |
|
"loss": 0.2557, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.140343060815326, |
|
"grad_norm": 12.664278984069824, |
|
"learning_rate": 5.982225185265335e-05, |
|
"loss": 0.2649, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.1688572065047893, |
|
"grad_norm": 12.713603019714355, |
|
"learning_rate": 5.61666444702003e-05, |
|
"loss": 0.2607, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.197371352194253, |
|
"grad_norm": 14.50146770477295, |
|
"learning_rate": 5.260039375634626e-05, |
|
"loss": 0.2569, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.225885497883716, |
|
"grad_norm": 14.117573738098145, |
|
"learning_rate": 4.91268958198777e-05, |
|
"loss": 0.2528, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.254399643573179, |
|
"grad_norm": 13.352962493896484, |
|
"learning_rate": 4.5749458441937426e-05, |
|
"loss": 0.2594, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.282913789262642, |
|
"grad_norm": 14.24978256225586, |
|
"learning_rate": 4.24712979260541e-05, |
|
"loss": 0.246, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.311427934952105, |
|
"grad_norm": 14.509572982788086, |
|
"learning_rate": 3.9295536035284975e-05, |
|
"loss": 0.2456, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.3399420806415683, |
|
"grad_norm": 13.541816711425781, |
|
"learning_rate": 3.622519701938879e-05, |
|
"loss": 0.2596, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.3684562263310314, |
|
"grad_norm": 13.303231239318848, |
|
"learning_rate": 3.326320473485965e-05, |
|
"loss": 0.2478, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.3969703720204945, |
|
"grad_norm": 13.914246559143066, |
|
"learning_rate": 3.0412379860564546e-05, |
|
"loss": 0.2471, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.4254845177099575, |
|
"grad_norm": 13.636366844177246, |
|
"learning_rate": 2.7675437211635994e-05, |
|
"loss": 0.244, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.4539986633994206, |
|
"grad_norm": 13.738758087158203, |
|
"learning_rate": 2.505498315417775e-05, |
|
"loss": 0.25, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.4825128090888837, |
|
"grad_norm": 12.734477043151855, |
|
"learning_rate": 2.2553513123245593e-05, |
|
"loss": 0.2514, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.5110269547783473, |
|
"grad_norm": 13.232802391052246, |
|
"learning_rate": 2.017340924646676e-05, |
|
"loss": 0.2517, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.5395411004678103, |
|
"grad_norm": 12.584091186523438, |
|
"learning_rate": 1.791693807556106e-05, |
|
"loss": 0.2423, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.5680552461572734, |
|
"grad_norm": 13.133004188537598, |
|
"learning_rate": 1.5786248427923765e-05, |
|
"loss": 0.2397, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.5965693918467365, |
|
"grad_norm": 12.750874519348145, |
|
"learning_rate": 1.3783369340326038e-05, |
|
"loss": 0.2402, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.6250835375361996, |
|
"grad_norm": 14.99782657623291, |
|
"learning_rate": 1.191020813668126e-05, |
|
"loss": 0.2325, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.6535976832256627, |
|
"grad_norm": 13.00296401977539, |
|
"learning_rate": 1.0168548611717453e-05, |
|
"loss": 0.2447, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.682111828915126, |
|
"grad_norm": 12.952726364135742, |
|
"learning_rate": 8.560049332285445e-06, |
|
"loss": 0.2324, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.710625974604589, |
|
"grad_norm": 12.093839645385742, |
|
"learning_rate": 7.086242057920466e-06, |
|
"loss": 0.2374, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.739140120294052, |
|
"grad_norm": 13.643256187438965, |
|
"learning_rate": 5.748530282161151e-06, |
|
"loss": 0.2375, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.7676542659835155, |
|
"grad_norm": 12.18138599395752, |
|
"learning_rate": 4.548187896015132e-06, |
|
"loss": 0.2398, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.796168411672978, |
|
"grad_norm": 13.081153869628906, |
|
"learning_rate": 3.4863579748440395e-06, |
|
"loss": 0.2414, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.8246825573624417, |
|
"grad_norm": 12.803534507751465, |
|
"learning_rate": 2.5640516898229824e-06, |
|
"loss": 0.2353, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.8531967030519048, |
|
"grad_norm": 14.616987228393555, |
|
"learning_rate": 1.7821473450112257e-06, |
|
"loss": 0.2387, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.881710848741368, |
|
"grad_norm": 13.588176727294922, |
|
"learning_rate": 1.1413895409510932e-06, |
|
"loss": 0.2393, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.910224994430831, |
|
"grad_norm": 13.362626075744629, |
|
"learning_rate": 6.423884655915035e-07, |
|
"loss": 0.2452, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.938739140120294, |
|
"grad_norm": 13.089447021484375, |
|
"learning_rate": 2.85619313211366e-07, |
|
"loss": 0.2442, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.967253285809757, |
|
"grad_norm": 14.710633277893066, |
|
"learning_rate": 7.142183189641215e-08, |
|
"loss": 0.2395, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.99576743149922, |
|
"grad_norm": 13.993697166442871, |
|
"learning_rate": 0.0, |
|
"loss": 0.2403, |
|
"step": 1050 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1050, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.2082385668508221e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|