gemma-2-2b-it-WriteMyPaper / trainer_state.json
gyopark's picture
Add Fine-Tuned WriteMyPaper Model
f3f5fee
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.06940251375904835,
"eval_steps": 500,
"global_step": 5000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.001388050275180967,
"grad_norm": 1.386236310005188,
"learning_rate": 0.00013333333333333334,
"loss": 2.432,
"step": 100
},
{
"epoch": 0.002776100550361934,
"grad_norm": 1.2055169343948364,
"learning_rate": 0.00019793814432989693,
"loss": 1.8849,
"step": 200
},
{
"epoch": 0.0041641508255429015,
"grad_norm": 1.1103655099868774,
"learning_rate": 0.00019381443298969073,
"loss": 1.7944,
"step": 300
},
{
"epoch": 0.005552201100723868,
"grad_norm": 1.1928976774215698,
"learning_rate": 0.00018969072164948454,
"loss": 1.7832,
"step": 400
},
{
"epoch": 0.006940251375904836,
"grad_norm": 1.2889844179153442,
"learning_rate": 0.00018556701030927837,
"loss": 1.811,
"step": 500
},
{
"epoch": 0.008328301651085803,
"grad_norm": 1.304612159729004,
"learning_rate": 0.00018144329896907217,
"loss": 1.7718,
"step": 600
},
{
"epoch": 0.00971635192626677,
"grad_norm": 1.2339240312576294,
"learning_rate": 0.00017731958762886598,
"loss": 1.7757,
"step": 700
},
{
"epoch": 0.011104402201447736,
"grad_norm": 1.196730136871338,
"learning_rate": 0.0001731958762886598,
"loss": 1.7639,
"step": 800
},
{
"epoch": 0.012492452476628703,
"grad_norm": 1.1369080543518066,
"learning_rate": 0.00016907216494845361,
"loss": 1.7508,
"step": 900
},
{
"epoch": 0.013880502751809671,
"grad_norm": 1.1714072227478027,
"learning_rate": 0.00016494845360824742,
"loss": 1.7451,
"step": 1000
},
{
"epoch": 0.015268553026990638,
"grad_norm": 2.0464041233062744,
"learning_rate": 0.00016082474226804125,
"loss": 1.7266,
"step": 1100
},
{
"epoch": 0.016656603302171606,
"grad_norm": 1.2468883991241455,
"learning_rate": 0.00015670103092783506,
"loss": 1.7476,
"step": 1200
},
{
"epoch": 0.018044653577352573,
"grad_norm": 1.3278380632400513,
"learning_rate": 0.00015257731958762886,
"loss": 1.713,
"step": 1300
},
{
"epoch": 0.01943270385253354,
"grad_norm": 1.83975088596344,
"learning_rate": 0.0001484536082474227,
"loss": 1.712,
"step": 1400
},
{
"epoch": 0.020820754127714506,
"grad_norm": 1.1055878400802612,
"learning_rate": 0.0001443298969072165,
"loss": 1.7305,
"step": 1500
},
{
"epoch": 0.022208804402895473,
"grad_norm": 1.2078220844268799,
"learning_rate": 0.0001402061855670103,
"loss": 1.7094,
"step": 1600
},
{
"epoch": 0.02359685467807644,
"grad_norm": 1.9538626670837402,
"learning_rate": 0.00013608247422680414,
"loss": 1.7252,
"step": 1700
},
{
"epoch": 0.024984904953257406,
"grad_norm": 1.149594783782959,
"learning_rate": 0.00013195876288659794,
"loss": 1.7108,
"step": 1800
},
{
"epoch": 0.026372955228438372,
"grad_norm": 1.2434228658676147,
"learning_rate": 0.00012783505154639175,
"loss": 1.7106,
"step": 1900
},
{
"epoch": 0.027761005503619342,
"grad_norm": 1.0074732303619385,
"learning_rate": 0.00012371134020618558,
"loss": 1.7222,
"step": 2000
},
{
"epoch": 0.02914905577880031,
"grad_norm": 1.2611148357391357,
"learning_rate": 0.00011958762886597938,
"loss": 1.6937,
"step": 2100
},
{
"epoch": 0.030537106053981276,
"grad_norm": 1.0606039762496948,
"learning_rate": 0.00011546391752577319,
"loss": 1.6904,
"step": 2200
},
{
"epoch": 0.03192515632916224,
"grad_norm": 1.202054500579834,
"learning_rate": 0.00011134020618556702,
"loss": 1.7305,
"step": 2300
},
{
"epoch": 0.03331320660434321,
"grad_norm": 0.9994720220565796,
"learning_rate": 0.00010721649484536083,
"loss": 1.7045,
"step": 2400
},
{
"epoch": 0.034701256879524175,
"grad_norm": 1.222708821296692,
"learning_rate": 0.00010309278350515463,
"loss": 1.6707,
"step": 2500
},
{
"epoch": 0.036089307154705146,
"grad_norm": 1.170048475265503,
"learning_rate": 9.896907216494846e-05,
"loss": 1.6999,
"step": 2600
},
{
"epoch": 0.03747735742988611,
"grad_norm": 1.3302826881408691,
"learning_rate": 9.484536082474227e-05,
"loss": 1.6899,
"step": 2700
},
{
"epoch": 0.03886540770506708,
"grad_norm": 1.1185508966445923,
"learning_rate": 9.072164948453609e-05,
"loss": 1.6899,
"step": 2800
},
{
"epoch": 0.04025345798024804,
"grad_norm": 1.6227563619613647,
"learning_rate": 8.65979381443299e-05,
"loss": 1.6844,
"step": 2900
},
{
"epoch": 0.04164150825542901,
"grad_norm": 1.4536927938461304,
"learning_rate": 8.247422680412371e-05,
"loss": 1.6873,
"step": 3000
},
{
"epoch": 0.04302955853060998,
"grad_norm": 1.2233431339263916,
"learning_rate": 7.835051546391753e-05,
"loss": 1.656,
"step": 3100
},
{
"epoch": 0.044417608805790945,
"grad_norm": 1.3101099729537964,
"learning_rate": 7.422680412371135e-05,
"loss": 1.6722,
"step": 3200
},
{
"epoch": 0.045805659080971915,
"grad_norm": 1.224885106086731,
"learning_rate": 7.010309278350515e-05,
"loss": 1.6536,
"step": 3300
},
{
"epoch": 0.04719370935615288,
"grad_norm": 1.1796605587005615,
"learning_rate": 6.597938144329897e-05,
"loss": 1.6783,
"step": 3400
},
{
"epoch": 0.04858175963133385,
"grad_norm": 1.2505239248275757,
"learning_rate": 6.185567010309279e-05,
"loss": 1.676,
"step": 3500
},
{
"epoch": 0.04996980990651481,
"grad_norm": 0.9648654460906982,
"learning_rate": 5.7731958762886594e-05,
"loss": 1.6617,
"step": 3600
},
{
"epoch": 0.05135786018169578,
"grad_norm": 1.0220248699188232,
"learning_rate": 5.360824742268041e-05,
"loss": 1.6611,
"step": 3700
},
{
"epoch": 0.052745910456876745,
"grad_norm": 1.0856778621673584,
"learning_rate": 4.948453608247423e-05,
"loss": 1.6608,
"step": 3800
},
{
"epoch": 0.054133960732057715,
"grad_norm": 1.439794659614563,
"learning_rate": 4.536082474226804e-05,
"loss": 1.6693,
"step": 3900
},
{
"epoch": 0.055522011007238685,
"grad_norm": 1.1624224185943604,
"learning_rate": 4.1237113402061855e-05,
"loss": 1.6447,
"step": 4000
},
{
"epoch": 0.05691006128241965,
"grad_norm": 1.0445908308029175,
"learning_rate": 3.7113402061855674e-05,
"loss": 1.6458,
"step": 4100
},
{
"epoch": 0.05829811155760062,
"grad_norm": 1.2009379863739014,
"learning_rate": 3.2989690721649485e-05,
"loss": 1.6362,
"step": 4200
},
{
"epoch": 0.05968616183278158,
"grad_norm": 1.1339406967163086,
"learning_rate": 2.8865979381443297e-05,
"loss": 1.6605,
"step": 4300
},
{
"epoch": 0.06107421210796255,
"grad_norm": 1.1409072875976562,
"learning_rate": 2.4742268041237116e-05,
"loss": 1.6582,
"step": 4400
},
{
"epoch": 0.062462262383143514,
"grad_norm": 1.0138684511184692,
"learning_rate": 2.0618556701030927e-05,
"loss": 1.6539,
"step": 4500
},
{
"epoch": 0.06385031265832448,
"grad_norm": 1.0418405532836914,
"learning_rate": 1.6494845360824743e-05,
"loss": 1.6676,
"step": 4600
},
{
"epoch": 0.06523836293350545,
"grad_norm": 1.0475600957870483,
"learning_rate": 1.2371134020618558e-05,
"loss": 1.6416,
"step": 4700
},
{
"epoch": 0.06662641320868642,
"grad_norm": 1.334047555923462,
"learning_rate": 8.247422680412371e-06,
"loss": 1.6297,
"step": 4800
},
{
"epoch": 0.06801446348386739,
"grad_norm": 1.0327249765396118,
"learning_rate": 4.123711340206186e-06,
"loss": 1.6329,
"step": 4900
},
{
"epoch": 0.06940251375904835,
"grad_norm": 1.007521390914917,
"learning_rate": 0.0,
"loss": 1.6405,
"step": 5000
}
],
"logging_steps": 100,
"max_steps": 5000,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.1538290661520589e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}