{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.8228759514503189, "eval_steps": 500, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.016457519029006377, "grad_norm": 1.7265625, "learning_rate": 4e-05, "loss": 3.4409, "step": 20 }, { "epoch": 0.032915038058012755, "grad_norm": 1.125, "learning_rate": 8e-05, "loss": 2.834, "step": 40 }, { "epoch": 0.04937255708701913, "grad_norm": 2.203125, "learning_rate": 0.00012, "loss": 2.0422, "step": 60 }, { "epoch": 0.06583007611602551, "grad_norm": 1.734375, "learning_rate": 0.00016, "loss": 1.6617, "step": 80 }, { "epoch": 0.08228759514503188, "grad_norm": 1.4765625, "learning_rate": 0.0002, "loss": 1.5035, "step": 100 }, { "epoch": 0.09874511417403826, "grad_norm": 1.6328125, "learning_rate": 0.0001988716502115656, "loss": 1.4449, "step": 120 }, { "epoch": 0.11520263320304464, "grad_norm": 2.015625, "learning_rate": 0.00019774330042313118, "loss": 1.3653, "step": 140 }, { "epoch": 0.13166015223205102, "grad_norm": 1.640625, "learning_rate": 0.00019661495063469676, "loss": 1.3634, "step": 160 }, { "epoch": 0.1481176712610574, "grad_norm": 1.2734375, "learning_rate": 0.00019548660084626237, "loss": 1.3116, "step": 180 }, { "epoch": 0.16457519029006376, "grad_norm": 1.1796875, "learning_rate": 0.00019435825105782795, "loss": 1.2961, "step": 200 }, { "epoch": 0.18103270931907015, "grad_norm": 1.359375, "learning_rate": 0.00019322990126939354, "loss": 1.2868, "step": 220 }, { "epoch": 0.19749022834807653, "grad_norm": 1.2421875, "learning_rate": 0.0001921015514809591, "loss": 1.2902, "step": 240 }, { "epoch": 0.21394774737708291, "grad_norm": 1.5859375, "learning_rate": 0.00019097320169252468, "loss": 1.209, "step": 260 }, { "epoch": 0.23040526640608927, "grad_norm": 1.5859375, "learning_rate": 0.00018984485190409026, "loss": 1.2912, "step": 280 }, { "epoch": 0.24686278543509566, "grad_norm": 1.4296875, "learning_rate": 0.00018871650211565587, "loss": 1.2733, "step": 300 }, { "epoch": 0.26332030446410204, "grad_norm": 1.4140625, "learning_rate": 0.00018758815232722145, "loss": 1.1895, "step": 320 }, { "epoch": 0.2797778234931084, "grad_norm": 1.25, "learning_rate": 0.00018645980253878704, "loss": 1.2259, "step": 340 }, { "epoch": 0.2962353425221148, "grad_norm": 1.4921875, "learning_rate": 0.00018533145275035262, "loss": 1.2636, "step": 360 }, { "epoch": 0.3126928615511212, "grad_norm": 1.796875, "learning_rate": 0.0001842031029619182, "loss": 1.2748, "step": 380 }, { "epoch": 0.3291503805801275, "grad_norm": 1.4296875, "learning_rate": 0.0001830747531734838, "loss": 1.1335, "step": 400 }, { "epoch": 0.3456078996091339, "grad_norm": 1.6640625, "learning_rate": 0.00018194640338504937, "loss": 1.1985, "step": 420 }, { "epoch": 0.3620654186381403, "grad_norm": 1.5078125, "learning_rate": 0.00018081805359661496, "loss": 1.2015, "step": 440 }, { "epoch": 0.3785229376671467, "grad_norm": 1.53125, "learning_rate": 0.00017968970380818057, "loss": 1.1548, "step": 460 }, { "epoch": 0.39498045669615306, "grad_norm": 1.359375, "learning_rate": 0.00017856135401974612, "loss": 1.1249, "step": 480 }, { "epoch": 0.41143797572515944, "grad_norm": 1.7109375, "learning_rate": 0.0001774330042313117, "loss": 1.1734, "step": 500 }, { "epoch": 0.42789549475416583, "grad_norm": 1.71875, "learning_rate": 0.0001763046544428773, "loss": 1.1772, "step": 520 }, { "epoch": 0.4443530137831722, "grad_norm": 1.2578125, "learning_rate": 0.00017517630465444287, "loss": 1.1092, "step": 540 }, { "epoch": 0.46081053281217854, "grad_norm": 1.234375, "learning_rate": 0.00017404795486600846, "loss": 1.1306, "step": 560 }, { "epoch": 0.4772680518411849, "grad_norm": 1.5234375, "learning_rate": 0.00017291960507757407, "loss": 1.1457, "step": 580 }, { "epoch": 0.4937255708701913, "grad_norm": 1.4375, "learning_rate": 0.00017179125528913965, "loss": 1.1501, "step": 600 }, { "epoch": 0.5101830898991977, "grad_norm": 1.1796875, "learning_rate": 0.00017066290550070523, "loss": 1.1539, "step": 620 }, { "epoch": 0.5266406089282041, "grad_norm": 1.3984375, "learning_rate": 0.00016953455571227082, "loss": 1.1242, "step": 640 }, { "epoch": 0.5430981279572105, "grad_norm": 1.6015625, "learning_rate": 0.0001684062059238364, "loss": 1.1551, "step": 660 }, { "epoch": 0.5595556469862168, "grad_norm": 1.0625, "learning_rate": 0.00016727785613540198, "loss": 1.1252, "step": 680 }, { "epoch": 0.5760131660152232, "grad_norm": 1.0078125, "learning_rate": 0.00016614950634696757, "loss": 1.1023, "step": 700 }, { "epoch": 0.5924706850442296, "grad_norm": 1.28125, "learning_rate": 0.00016502115655853315, "loss": 1.1136, "step": 720 }, { "epoch": 0.608928204073236, "grad_norm": 1.34375, "learning_rate": 0.00016389280677009873, "loss": 1.1363, "step": 740 }, { "epoch": 0.6253857231022424, "grad_norm": 1.3671875, "learning_rate": 0.00016276445698166432, "loss": 1.0763, "step": 760 }, { "epoch": 0.6418432421312487, "grad_norm": 1.34375, "learning_rate": 0.0001616361071932299, "loss": 1.1558, "step": 780 }, { "epoch": 0.658300761160255, "grad_norm": 1.140625, "learning_rate": 0.00016050775740479548, "loss": 1.0374, "step": 800 }, { "epoch": 0.6747582801892614, "grad_norm": 1.40625, "learning_rate": 0.00015937940761636107, "loss": 1.1492, "step": 820 }, { "epoch": 0.6912157992182678, "grad_norm": 1.5234375, "learning_rate": 0.00015825105782792665, "loss": 1.1261, "step": 840 }, { "epoch": 0.7076733182472742, "grad_norm": 1.46875, "learning_rate": 0.00015712270803949226, "loss": 1.1236, "step": 860 }, { "epoch": 0.7241308372762806, "grad_norm": 1.5, "learning_rate": 0.00015599435825105785, "loss": 1.0795, "step": 880 }, { "epoch": 0.740588356305287, "grad_norm": 1.28125, "learning_rate": 0.00015486600846262343, "loss": 1.0866, "step": 900 }, { "epoch": 0.7570458753342934, "grad_norm": 1.640625, "learning_rate": 0.000153737658674189, "loss": 1.0635, "step": 920 }, { "epoch": 0.7735033943632997, "grad_norm": 1.453125, "learning_rate": 0.0001526093088857546, "loss": 1.1106, "step": 940 }, { "epoch": 0.7899609133923061, "grad_norm": 1.0390625, "learning_rate": 0.00015148095909732018, "loss": 1.1241, "step": 960 }, { "epoch": 0.8064184324213125, "grad_norm": 1.296875, "learning_rate": 0.00015035260930888576, "loss": 1.078, "step": 980 }, { "epoch": 0.8228759514503189, "grad_norm": 1.109375, "learning_rate": 0.00014922425952045135, "loss": 1.1545, "step": 1000 } ], "logging_steps": 20, "max_steps": 3645, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.331493124343808e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }