|
{ |
|
"best_metric": 0.6402289867401123, |
|
"best_model_checkpoint": "nllb_200_distilled_1.3B_ENtoFO_bsz_64_epochs_10lr0.0001/checkpoint-7500", |
|
"epoch": 2.635885382917266, |
|
"eval_steps": 500, |
|
"global_step": 7500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.035145138438896883, |
|
"grad_norm": 0.7466627955436707, |
|
"learning_rate": 2e-05, |
|
"loss": 1.2984, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07029027687779377, |
|
"grad_norm": 0.8146196603775024, |
|
"learning_rate": 4e-05, |
|
"loss": 0.9825, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.10543541531669065, |
|
"grad_norm": 0.8888100981712341, |
|
"learning_rate": 6e-05, |
|
"loss": 0.9552, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.14058055375558753, |
|
"grad_norm": 0.7786069512367249, |
|
"learning_rate": 8e-05, |
|
"loss": 0.9249, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.17572569219448442, |
|
"grad_norm": 0.7248039245605469, |
|
"learning_rate": 0.0001, |
|
"loss": 0.9121, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.17572569219448442, |
|
"eval_bleu": 39.6289, |
|
"eval_chrf++": 58.7253, |
|
"eval_gen_len": 17.6964, |
|
"eval_loss": 0.8153083920478821, |
|
"eval_runtime": 3428.0218, |
|
"eval_samples_per_second": 2.136, |
|
"eval_steps_per_second": 1.068, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2108708306333813, |
|
"grad_norm": 0.6579689979553223, |
|
"learning_rate": 9.964221824686941e-05, |
|
"loss": 0.9033, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.24601596907227818, |
|
"grad_norm": 0.7566829919815063, |
|
"learning_rate": 9.928443649373882e-05, |
|
"loss": 0.89, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.28116110751117507, |
|
"grad_norm": 0.750449001789093, |
|
"learning_rate": 9.892665474060824e-05, |
|
"loss": 0.8709, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.31630624595007195, |
|
"grad_norm": 0.6458595395088196, |
|
"learning_rate": 9.856887298747764e-05, |
|
"loss": 0.855, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.35145138438896883, |
|
"grad_norm": 0.6504934430122375, |
|
"learning_rate": 9.821109123434705e-05, |
|
"loss": 0.8558, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.35145138438896883, |
|
"eval_bleu": 41.3868, |
|
"eval_chrf++": 60.2285, |
|
"eval_gen_len": 17.8309, |
|
"eval_loss": 0.7573392987251282, |
|
"eval_runtime": 3438.4799, |
|
"eval_samples_per_second": 2.129, |
|
"eval_steps_per_second": 1.065, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.3865965228278657, |
|
"grad_norm": 0.7932488918304443, |
|
"learning_rate": 9.785330948121646e-05, |
|
"loss": 0.8455, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.4217416612667626, |
|
"grad_norm": 0.7324668765068054, |
|
"learning_rate": 9.749552772808587e-05, |
|
"loss": 0.8363, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.4568867997056595, |
|
"grad_norm": 0.7606379985809326, |
|
"learning_rate": 9.713774597495528e-05, |
|
"loss": 0.816, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.49203193814455637, |
|
"grad_norm": 0.5376294851303101, |
|
"learning_rate": 9.677996422182469e-05, |
|
"loss": 0.8058, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.5271770765834533, |
|
"grad_norm": 0.903544008731842, |
|
"learning_rate": 9.64221824686941e-05, |
|
"loss": 0.8106, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.5271770765834533, |
|
"eval_bleu": 41.8145, |
|
"eval_chrf++": 60.6355, |
|
"eval_gen_len": 17.7981, |
|
"eval_loss": 0.7231032252311707, |
|
"eval_runtime": 3422.1307, |
|
"eval_samples_per_second": 2.139, |
|
"eval_steps_per_second": 1.07, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.5623222150223501, |
|
"grad_norm": 0.6048879027366638, |
|
"learning_rate": 9.606440071556351e-05, |
|
"loss": 0.7833, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.597467353461247, |
|
"grad_norm": 0.6456710696220398, |
|
"learning_rate": 9.570661896243292e-05, |
|
"loss": 0.7913, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.6326124919001439, |
|
"grad_norm": 0.6244344711303711, |
|
"learning_rate": 9.534883720930233e-05, |
|
"loss": 0.7991, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.6677576303390408, |
|
"grad_norm": 0.6789991855621338, |
|
"learning_rate": 9.499105545617174e-05, |
|
"loss": 0.7764, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.7029027687779377, |
|
"grad_norm": 0.7175530195236206, |
|
"learning_rate": 9.463327370304115e-05, |
|
"loss": 0.782, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.7029027687779377, |
|
"eval_bleu": 42.7477, |
|
"eval_chrf++": 61.3889, |
|
"eval_gen_len": 17.7144, |
|
"eval_loss": 0.6981974244117737, |
|
"eval_runtime": 3442.4579, |
|
"eval_samples_per_second": 2.127, |
|
"eval_steps_per_second": 1.063, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.7380479072168346, |
|
"grad_norm": 0.5626235604286194, |
|
"learning_rate": 9.427549194991055e-05, |
|
"loss": 0.7717, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.7731930456557314, |
|
"grad_norm": 0.6301568746566772, |
|
"learning_rate": 9.391771019677997e-05, |
|
"loss": 0.7697, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.8083381840946283, |
|
"grad_norm": 0.5354183316230774, |
|
"learning_rate": 9.355992844364938e-05, |
|
"loss": 0.7659, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.8434833225335252, |
|
"grad_norm": 0.6053968071937561, |
|
"learning_rate": 9.320214669051879e-05, |
|
"loss": 0.7588, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.8786284609724221, |
|
"grad_norm": 0.593941330909729, |
|
"learning_rate": 9.284436493738819e-05, |
|
"loss": 0.7488, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.8786284609724221, |
|
"eval_bleu": 43.5398, |
|
"eval_chrf++": 61.9091, |
|
"eval_gen_len": 17.8354, |
|
"eval_loss": 0.6773364543914795, |
|
"eval_runtime": 3438.2409, |
|
"eval_samples_per_second": 2.129, |
|
"eval_steps_per_second": 1.065, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.913773599411319, |
|
"grad_norm": 0.5975068211555481, |
|
"learning_rate": 9.248658318425761e-05, |
|
"loss": 0.7571, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.9489187378502159, |
|
"grad_norm": 0.5520368218421936, |
|
"learning_rate": 9.212880143112702e-05, |
|
"loss": 0.7398, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.9840638762891127, |
|
"grad_norm": 0.5748527646064758, |
|
"learning_rate": 9.177101967799643e-05, |
|
"loss": 0.7455, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.0192090147280095, |
|
"grad_norm": 0.6073290705680847, |
|
"learning_rate": 9.141323792486583e-05, |
|
"loss": 0.6714, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 1.0543541531669065, |
|
"grad_norm": 0.6117560863494873, |
|
"learning_rate": 9.105545617173525e-05, |
|
"loss": 0.6163, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.0543541531669065, |
|
"eval_bleu": 43.8509, |
|
"eval_chrf++": 62.1772, |
|
"eval_gen_len": 17.8905, |
|
"eval_loss": 0.6679165959358215, |
|
"eval_runtime": 3455.7087, |
|
"eval_samples_per_second": 2.119, |
|
"eval_steps_per_second": 1.059, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.0894992916058033, |
|
"grad_norm": 0.5141863822937012, |
|
"learning_rate": 9.069767441860465e-05, |
|
"loss": 0.6151, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 1.1246444300447003, |
|
"grad_norm": 0.5742676258087158, |
|
"learning_rate": 9.033989266547407e-05, |
|
"loss": 0.6236, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 1.159789568483597, |
|
"grad_norm": 0.5287165641784668, |
|
"learning_rate": 8.998211091234347e-05, |
|
"loss": 0.6249, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 1.194934706922494, |
|
"grad_norm": 0.5631851553916931, |
|
"learning_rate": 8.962432915921289e-05, |
|
"loss": 0.6229, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 1.2300798453613908, |
|
"grad_norm": 0.5420289039611816, |
|
"learning_rate": 8.926654740608229e-05, |
|
"loss": 0.6103, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.2300798453613908, |
|
"eval_bleu": 44.0794, |
|
"eval_chrf++": 62.3925, |
|
"eval_gen_len": 17.7716, |
|
"eval_loss": 0.6606557369232178, |
|
"eval_runtime": 3446.6004, |
|
"eval_samples_per_second": 2.124, |
|
"eval_steps_per_second": 1.062, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.2652249838002878, |
|
"grad_norm": 0.576799750328064, |
|
"learning_rate": 8.890876565295171e-05, |
|
"loss": 0.6076, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 1.3003701222391846, |
|
"grad_norm": 0.4976769983768463, |
|
"learning_rate": 8.855098389982111e-05, |
|
"loss": 0.6084, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 1.3355152606780816, |
|
"grad_norm": 0.5788149237632751, |
|
"learning_rate": 8.819320214669053e-05, |
|
"loss": 0.6121, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 1.3706603991169783, |
|
"grad_norm": 0.5019386410713196, |
|
"learning_rate": 8.783542039355993e-05, |
|
"loss": 0.6172, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 1.4058055375558753, |
|
"grad_norm": 0.5911116600036621, |
|
"learning_rate": 8.747763864042933e-05, |
|
"loss": 0.6037, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.4058055375558753, |
|
"eval_bleu": 44.0555, |
|
"eval_chrf++": 62.3227, |
|
"eval_gen_len": 17.7384, |
|
"eval_loss": 0.6558951139450073, |
|
"eval_runtime": 3448.9332, |
|
"eval_samples_per_second": 2.123, |
|
"eval_steps_per_second": 1.061, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.4409506759947721, |
|
"grad_norm": 0.5771309733390808, |
|
"learning_rate": 8.711985688729875e-05, |
|
"loss": 0.6067, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 1.476095814433669, |
|
"grad_norm": 0.6425995230674744, |
|
"learning_rate": 8.676207513416817e-05, |
|
"loss": 0.6097, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 1.5112409528725659, |
|
"grad_norm": 0.5513516664505005, |
|
"learning_rate": 8.640429338103757e-05, |
|
"loss": 0.6084, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 1.5463860913114629, |
|
"grad_norm": 0.5649448037147522, |
|
"learning_rate": 8.604651162790697e-05, |
|
"loss": 0.6026, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.5815312297503596, |
|
"grad_norm": 0.6140225529670715, |
|
"learning_rate": 8.568872987477639e-05, |
|
"loss": 0.6074, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.5815312297503596, |
|
"eval_bleu": 43.6701, |
|
"eval_chrf++": 62.1631, |
|
"eval_gen_len": 17.6893, |
|
"eval_loss": 0.6474220156669617, |
|
"eval_runtime": 3434.705, |
|
"eval_samples_per_second": 2.131, |
|
"eval_steps_per_second": 1.066, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.6166763681892564, |
|
"grad_norm": 0.5787105560302734, |
|
"learning_rate": 8.533094812164581e-05, |
|
"loss": 0.6167, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.6518215066281534, |
|
"grad_norm": 0.5308918356895447, |
|
"learning_rate": 8.497316636851521e-05, |
|
"loss": 0.5936, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.6869666450670504, |
|
"grad_norm": 0.5810381770133972, |
|
"learning_rate": 8.461538461538461e-05, |
|
"loss": 0.6074, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.7221117835059472, |
|
"grad_norm": 0.6304420232772827, |
|
"learning_rate": 8.425760286225403e-05, |
|
"loss": 0.5995, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.757256921944844, |
|
"grad_norm": 0.6314705610275269, |
|
"learning_rate": 8.389982110912343e-05, |
|
"loss": 0.6064, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.757256921944844, |
|
"eval_bleu": 44.2084, |
|
"eval_chrf++": 62.587, |
|
"eval_gen_len": 17.7677, |
|
"eval_loss": 0.6390019059181213, |
|
"eval_runtime": 3462.9771, |
|
"eval_samples_per_second": 2.114, |
|
"eval_steps_per_second": 1.057, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.792402060383741, |
|
"grad_norm": 0.5468895435333252, |
|
"learning_rate": 8.354203935599285e-05, |
|
"loss": 0.6014, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.827547198822638, |
|
"grad_norm": 0.564479649066925, |
|
"learning_rate": 8.318425760286225e-05, |
|
"loss": 0.5984, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.8626923372615347, |
|
"grad_norm": 0.6068270206451416, |
|
"learning_rate": 8.282647584973167e-05, |
|
"loss": 0.5995, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.8978374757004315, |
|
"grad_norm": 0.5385338068008423, |
|
"learning_rate": 8.246869409660107e-05, |
|
"loss": 0.6023, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.9329826141393285, |
|
"grad_norm": 0.5280515551567078, |
|
"learning_rate": 8.211091234347049e-05, |
|
"loss": 0.5969, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.9329826141393285, |
|
"eval_bleu": 44.5351, |
|
"eval_chrf++": 62.758, |
|
"eval_gen_len": 17.7998, |
|
"eval_loss": 0.6315435767173767, |
|
"eval_runtime": 3432.7408, |
|
"eval_samples_per_second": 2.133, |
|
"eval_steps_per_second": 1.066, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.9681277525782255, |
|
"grad_norm": 0.5488588213920593, |
|
"learning_rate": 8.17531305903399e-05, |
|
"loss": 0.5845, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 2.0032728910171222, |
|
"grad_norm": 0.49017634987831116, |
|
"learning_rate": 8.139534883720931e-05, |
|
"loss": 0.59, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 2.038418029456019, |
|
"grad_norm": 0.5274912118911743, |
|
"learning_rate": 8.103756708407871e-05, |
|
"loss": 0.4667, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 2.0735631678949162, |
|
"grad_norm": 1.4353556632995605, |
|
"learning_rate": 8.067978533094812e-05, |
|
"loss": 0.4706, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 2.108708306333813, |
|
"grad_norm": 0.5296390056610107, |
|
"learning_rate": 8.032200357781753e-05, |
|
"loss": 0.4697, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.108708306333813, |
|
"eval_bleu": 44.1342, |
|
"eval_chrf++": 62.5401, |
|
"eval_gen_len": 17.7677, |
|
"eval_loss": 0.6582108736038208, |
|
"eval_runtime": 3425.6987, |
|
"eval_samples_per_second": 2.137, |
|
"eval_steps_per_second": 1.069, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.14385344477271, |
|
"grad_norm": 0.6416345238685608, |
|
"learning_rate": 7.996422182468695e-05, |
|
"loss": 0.4882, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 2.1789985832116066, |
|
"grad_norm": 0.5234227180480957, |
|
"learning_rate": 7.960644007155635e-05, |
|
"loss": 0.4835, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 2.2141437216505038, |
|
"grad_norm": 0.4757489860057831, |
|
"learning_rate": 7.924865831842576e-05, |
|
"loss": 0.4771, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 2.2492888600894005, |
|
"grad_norm": 0.5438205599784851, |
|
"learning_rate": 7.889087656529517e-05, |
|
"loss": 0.4829, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 2.2844339985282973, |
|
"grad_norm": 0.5392005443572998, |
|
"learning_rate": 7.853309481216459e-05, |
|
"loss": 0.474, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.2844339985282973, |
|
"eval_bleu": 44.2923, |
|
"eval_chrf++": 62.5586, |
|
"eval_gen_len": 17.7492, |
|
"eval_loss": 0.6481789350509644, |
|
"eval_runtime": 3422.6338, |
|
"eval_samples_per_second": 2.139, |
|
"eval_steps_per_second": 1.07, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.319579136967194, |
|
"grad_norm": 0.5046759843826294, |
|
"learning_rate": 7.8175313059034e-05, |
|
"loss": 0.4802, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 2.354724275406091, |
|
"grad_norm": 0.49111250042915344, |
|
"learning_rate": 7.78175313059034e-05, |
|
"loss": 0.4916, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 2.389869413844988, |
|
"grad_norm": 0.5712496638298035, |
|
"learning_rate": 7.745974955277281e-05, |
|
"loss": 0.4845, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 2.425014552283885, |
|
"grad_norm": 0.6314510703086853, |
|
"learning_rate": 7.710196779964223e-05, |
|
"loss": 0.484, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 2.4601596907227816, |
|
"grad_norm": 0.6166778802871704, |
|
"learning_rate": 7.674418604651163e-05, |
|
"loss": 0.488, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.4601596907227816, |
|
"eval_bleu": 44.7709, |
|
"eval_chrf++": 62.9298, |
|
"eval_gen_len": 17.7547, |
|
"eval_loss": 0.6452430486679077, |
|
"eval_runtime": 3438.1607, |
|
"eval_samples_per_second": 2.129, |
|
"eval_steps_per_second": 1.065, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.495304829161679, |
|
"grad_norm": 0.5143587589263916, |
|
"learning_rate": 7.638640429338104e-05, |
|
"loss": 0.4875, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 2.5304499676005756, |
|
"grad_norm": 0.5172815322875977, |
|
"learning_rate": 7.602862254025045e-05, |
|
"loss": 0.4805, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 2.5655951060394724, |
|
"grad_norm": 0.49376818537712097, |
|
"learning_rate": 7.567084078711986e-05, |
|
"loss": 0.488, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 2.600740244478369, |
|
"grad_norm": 0.5714296102523804, |
|
"learning_rate": 7.531305903398927e-05, |
|
"loss": 0.4893, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 2.635885382917266, |
|
"grad_norm": 0.47455132007598877, |
|
"learning_rate": 7.495527728085868e-05, |
|
"loss": 0.4767, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.635885382917266, |
|
"eval_bleu": 44.8961, |
|
"eval_chrf++": 63.0641, |
|
"eval_gen_len": 17.7427, |
|
"eval_loss": 0.6402289867401123, |
|
"eval_runtime": 3447.7653, |
|
"eval_samples_per_second": 2.123, |
|
"eval_steps_per_second": 1.062, |
|
"step": 7500 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 28450, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 1500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.268447098428719e+18, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|