gemma-2-2b-it-WriteMyPaper / trainer_state.json

Add Fine-Tuned WriteMyPaper Model

f3f5fee 5 months ago

9.52 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.06940251375904835,
	"eval_steps": 500,
	"global_step": 5000,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.001388050275180967,
	"grad_norm": 1.386236310005188,
	"learning_rate": 0.00013333333333333334,
	"loss": 2.432,
	"step": 100
	},
	{
	"epoch": 0.002776100550361934,
	"grad_norm": 1.2055169343948364,
	"learning_rate": 0.00019793814432989693,
	"loss": 1.8849,
	"step": 200
	},
	{
	"epoch": 0.0041641508255429015,
	"grad_norm": 1.1103655099868774,
	"learning_rate": 0.00019381443298969073,
	"loss": 1.7944,
	"step": 300
	},
	{
	"epoch": 0.005552201100723868,
	"grad_norm": 1.1928976774215698,
	"learning_rate": 0.00018969072164948454,
	"loss": 1.7832,
	"step": 400
	},
	{
	"epoch": 0.006940251375904836,
	"grad_norm": 1.2889844179153442,
	"learning_rate": 0.00018556701030927837,
	"loss": 1.811,
	"step": 500
	},
	{
	"epoch": 0.008328301651085803,
	"grad_norm": 1.304612159729004,
	"learning_rate": 0.00018144329896907217,
	"loss": 1.7718,
	"step": 600
	},
	{
	"epoch": 0.00971635192626677,
	"grad_norm": 1.2339240312576294,
	"learning_rate": 0.00017731958762886598,
	"loss": 1.7757,
	"step": 700
	},
	{
	"epoch": 0.011104402201447736,
	"grad_norm": 1.196730136871338,
	"learning_rate": 0.0001731958762886598,
	"loss": 1.7639,
	"step": 800
	},
	{
	"epoch": 0.012492452476628703,
	"grad_norm": 1.1369080543518066,
	"learning_rate": 0.00016907216494845361,
	"loss": 1.7508,
	"step": 900
	},
	{
	"epoch": 0.013880502751809671,
	"grad_norm": 1.1714072227478027,
	"learning_rate": 0.00016494845360824742,
	"loss": 1.7451,
	"step": 1000
	},
	{
	"epoch": 0.015268553026990638,
	"grad_norm": 2.0464041233062744,
	"learning_rate": 0.00016082474226804125,
	"loss": 1.7266,
	"step": 1100
	},
	{
	"epoch": 0.016656603302171606,
	"grad_norm": 1.2468883991241455,
	"learning_rate": 0.00015670103092783506,
	"loss": 1.7476,
	"step": 1200
	},
	{
	"epoch": 0.018044653577352573,
	"grad_norm": 1.3278380632400513,
	"learning_rate": 0.00015257731958762886,
	"loss": 1.713,
	"step": 1300
	},
	{
	"epoch": 0.01943270385253354,
	"grad_norm": 1.83975088596344,
	"learning_rate": 0.0001484536082474227,
	"loss": 1.712,
	"step": 1400
	},
	{
	"epoch": 0.020820754127714506,
	"grad_norm": 1.1055878400802612,
	"learning_rate": 0.0001443298969072165,
	"loss": 1.7305,
	"step": 1500
	},
	{
	"epoch": 0.022208804402895473,
	"grad_norm": 1.2078220844268799,
	"learning_rate": 0.0001402061855670103,
	"loss": 1.7094,
	"step": 1600
	},
	{
	"epoch": 0.02359685467807644,
	"grad_norm": 1.9538626670837402,
	"learning_rate": 0.00013608247422680414,
	"loss": 1.7252,
	"step": 1700
	},
	{
	"epoch": 0.024984904953257406,
	"grad_norm": 1.149594783782959,
	"learning_rate": 0.00013195876288659794,
	"loss": 1.7108,
	"step": 1800
	},
	{
	"epoch": 0.026372955228438372,
	"grad_norm": 1.2434228658676147,
	"learning_rate": 0.00012783505154639175,
	"loss": 1.7106,
	"step": 1900
	},
	{
	"epoch": 0.027761005503619342,
	"grad_norm": 1.0074732303619385,
	"learning_rate": 0.00012371134020618558,
	"loss": 1.7222,
	"step": 2000
	},
	{
	"epoch": 0.02914905577880031,
	"grad_norm": 1.2611148357391357,
	"learning_rate": 0.00011958762886597938,
	"loss": 1.6937,
	"step": 2100
	},
	{
	"epoch": 0.030537106053981276,
	"grad_norm": 1.0606039762496948,
	"learning_rate": 0.00011546391752577319,
	"loss": 1.6904,
	"step": 2200
	},
	{
	"epoch": 0.03192515632916224,
	"grad_norm": 1.202054500579834,
	"learning_rate": 0.00011134020618556702,
	"loss": 1.7305,
	"step": 2300
	},
	{
	"epoch": 0.03331320660434321,
	"grad_norm": 0.9994720220565796,
	"learning_rate": 0.00010721649484536083,
	"loss": 1.7045,
	"step": 2400
	},
	{
	"epoch": 0.034701256879524175,
	"grad_norm": 1.222708821296692,
	"learning_rate": 0.00010309278350515463,
	"loss": 1.6707,
	"step": 2500
	},
	{
	"epoch": 0.036089307154705146,
	"grad_norm": 1.170048475265503,
	"learning_rate": 9.896907216494846e-05,
	"loss": 1.6999,
	"step": 2600
	},
	{
	"epoch": 0.03747735742988611,
	"grad_norm": 1.3302826881408691,
	"learning_rate": 9.484536082474227e-05,
	"loss": 1.6899,
	"step": 2700
	},
	{
	"epoch": 0.03886540770506708,
	"grad_norm": 1.1185508966445923,
	"learning_rate": 9.072164948453609e-05,
	"loss": 1.6899,
	"step": 2800
	},
	{
	"epoch": 0.04025345798024804,
	"grad_norm": 1.6227563619613647,
	"learning_rate": 8.65979381443299e-05,
	"loss": 1.6844,
	"step": 2900
	},
	{
	"epoch": 0.04164150825542901,
	"grad_norm": 1.4536927938461304,
	"learning_rate": 8.247422680412371e-05,
	"loss": 1.6873,
	"step": 3000
	},
	{
	"epoch": 0.04302955853060998,
	"grad_norm": 1.2233431339263916,
	"learning_rate": 7.835051546391753e-05,
	"loss": 1.656,
	"step": 3100
	},
	{
	"epoch": 0.044417608805790945,
	"grad_norm": 1.3101099729537964,
	"learning_rate": 7.422680412371135e-05,
	"loss": 1.6722,
	"step": 3200
	},
	{
	"epoch": 0.045805659080971915,
	"grad_norm": 1.224885106086731,
	"learning_rate": 7.010309278350515e-05,
	"loss": 1.6536,
	"step": 3300
	},
	{
	"epoch": 0.04719370935615288,
	"grad_norm": 1.1796605587005615,
	"learning_rate": 6.597938144329897e-05,
	"loss": 1.6783,
	"step": 3400
	},
	{
	"epoch": 0.04858175963133385,
	"grad_norm": 1.2505239248275757,
	"learning_rate": 6.185567010309279e-05,
	"loss": 1.676,
	"step": 3500
	},
	{
	"epoch": 0.04996980990651481,
	"grad_norm": 0.9648654460906982,
	"learning_rate": 5.7731958762886594e-05,
	"loss": 1.6617,
	"step": 3600
	},
	{
	"epoch": 0.05135786018169578,
	"grad_norm": 1.0220248699188232,
	"learning_rate": 5.360824742268041e-05,
	"loss": 1.6611,
	"step": 3700
	},
	{
	"epoch": 0.052745910456876745,
	"grad_norm": 1.0856778621673584,
	"learning_rate": 4.948453608247423e-05,
	"loss": 1.6608,
	"step": 3800
	},
	{
	"epoch": 0.054133960732057715,
	"grad_norm": 1.439794659614563,
	"learning_rate": 4.536082474226804e-05,
	"loss": 1.6693,
	"step": 3900
	},
	{
	"epoch": 0.055522011007238685,
	"grad_norm": 1.1624224185943604,
	"learning_rate": 4.1237113402061855e-05,
	"loss": 1.6447,
	"step": 4000
	},
	{
	"epoch": 0.05691006128241965,
	"grad_norm": 1.0445908308029175,
	"learning_rate": 3.7113402061855674e-05,
	"loss": 1.6458,
	"step": 4100
	},
	{
	"epoch": 0.05829811155760062,
	"grad_norm": 1.2009379863739014,
	"learning_rate": 3.2989690721649485e-05,
	"loss": 1.6362,
	"step": 4200
	},
	{
	"epoch": 0.05968616183278158,
	"grad_norm": 1.1339406967163086,
	"learning_rate": 2.8865979381443297e-05,
	"loss": 1.6605,
	"step": 4300
	},
	{
	"epoch": 0.06107421210796255,
	"grad_norm": 1.1409072875976562,
	"learning_rate": 2.4742268041237116e-05,
	"loss": 1.6582,
	"step": 4400
	},
	{
	"epoch": 0.062462262383143514,
	"grad_norm": 1.0138684511184692,
	"learning_rate": 2.0618556701030927e-05,
	"loss": 1.6539,
	"step": 4500
	},
	{
	"epoch": 0.06385031265832448,
	"grad_norm": 1.0418405532836914,
	"learning_rate": 1.6494845360824743e-05,
	"loss": 1.6676,
	"step": 4600
	},
	{
	"epoch": 0.06523836293350545,
	"grad_norm": 1.0475600957870483,
	"learning_rate": 1.2371134020618558e-05,
	"loss": 1.6416,
	"step": 4700
	},
	{
	"epoch": 0.06662641320868642,
	"grad_norm": 1.334047555923462,
	"learning_rate": 8.247422680412371e-06,
	"loss": 1.6297,
	"step": 4800
	},
	{
	"epoch": 0.06801446348386739,
	"grad_norm": 1.0327249765396118,
	"learning_rate": 4.123711340206186e-06,
	"loss": 1.6329,
	"step": 4900
	},
	{
	"epoch": 0.06940251375904835,
	"grad_norm": 1.007521390914917,
	"learning_rate": 0.0,
	"loss": 1.6405,
	"step": 5000
	}
	],
	"logging_steps": 100,
	"max_steps": 5000,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 1.1538290661520589e+17,
	"train_batch_size": 1,
	"trial_name": null,
	"trial_params": null
	}