|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.971563981042654, |
|
"eval_steps": 100, |
|
"global_step": 104, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 138.6888657503535, |
|
"learning_rate": 4.545454545454545e-08, |
|
"logits/chosen": 111.1170883178711, |
|
"logits/rejected": 86.76590728759766, |
|
"logps/chosen": -327.55072021484375, |
|
"logps/rejected": -328.69842529296875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 143.62478348270474, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits/chosen": 110.3434066772461, |
|
"logits/rejected": 133.22720336914062, |
|
"logps/chosen": -350.7154541015625, |
|
"logps/rejected": -434.24615478515625, |
|
"loss": 0.6995, |
|
"rewards/accuracies": 0.5069444179534912, |
|
"rewards/chosen": 0.1381588876247406, |
|
"rewards/margins": 0.09099400788545609, |
|
"rewards/rejected": 0.047164879739284515, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 116.88419639658747, |
|
"learning_rate": 4.885348141000122e-07, |
|
"logits/chosen": 117.505126953125, |
|
"logits/rejected": 128.30625915527344, |
|
"logps/chosen": -331.0508728027344, |
|
"logps/rejected": -407.0337829589844, |
|
"loss": 0.6135, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.208949014544487, |
|
"rewards/margins": 0.6625346541404724, |
|
"rewards/rejected": -0.4535856246948242, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 105.11935497215215, |
|
"learning_rate": 4.5025027361734613e-07, |
|
"logits/chosen": 115.24714660644531, |
|
"logits/rejected": 119.91346740722656, |
|
"logps/chosen": -394.3634338378906, |
|
"logps/rejected": -469.9334411621094, |
|
"loss": 0.5933, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.4847453832626343, |
|
"rewards/margins": 1.0526559352874756, |
|
"rewards/rejected": -2.5374011993408203, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 108.46808872087831, |
|
"learning_rate": 3.893311157806091e-07, |
|
"logits/chosen": 118.00090026855469, |
|
"logits/rejected": 112.75251770019531, |
|
"logps/chosen": -422.1764221191406, |
|
"logps/rejected": -456.24334716796875, |
|
"loss": 0.5401, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.976963758468628, |
|
"rewards/margins": 0.8569629788398743, |
|
"rewards/rejected": -2.8339266777038574, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 123.46838634445918, |
|
"learning_rate": 3.126631330646801e-07, |
|
"logits/chosen": 125.94873046875, |
|
"logits/rejected": 127.3397445678711, |
|
"logps/chosen": -429.53900146484375, |
|
"logps/rejected": -464.96337890625, |
|
"loss": 0.5176, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.8666656017303467, |
|
"rewards/margins": 1.3108278512954712, |
|
"rewards/rejected": -3.1774933338165283, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 59.27830892615387, |
|
"learning_rate": 2.2891223348923882e-07, |
|
"logits/chosen": 125.06622314453125, |
|
"logits/rejected": 128.73312377929688, |
|
"logps/chosen": -400.2764892578125, |
|
"logps/rejected": -454.91912841796875, |
|
"loss": 0.2816, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.6803643703460693, |
|
"rewards/margins": 2.398113250732422, |
|
"rewards/rejected": -4.078477382659912, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 51.38072600603581, |
|
"learning_rate": 1.4754491880085317e-07, |
|
"logits/chosen": 119.95808410644531, |
|
"logits/rejected": 122.0335922241211, |
|
"logps/chosen": -374.8177185058594, |
|
"logps/rejected": -501.3230895996094, |
|
"loss": 0.193, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -1.81565260887146, |
|
"rewards/margins": 2.6209537982940674, |
|
"rewards/rejected": -4.436606407165527, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 45.51202655438821, |
|
"learning_rate": 7.775827023107834e-08, |
|
"logits/chosen": 126.967529296875, |
|
"logits/rejected": 128.75454711914062, |
|
"logps/chosen": -437.3567810058594, |
|
"logps/rejected": -533.428955078125, |
|
"loss": 0.1709, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.8041622638702393, |
|
"rewards/margins": 2.979466199874878, |
|
"rewards/rejected": -4.783627986907959, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 63.648512488393656, |
|
"learning_rate": 2.7440387297912122e-08, |
|
"logits/chosen": 110.33589172363281, |
|
"logits/rejected": 114.97691345214844, |
|
"logps/chosen": -413.44091796875, |
|
"logps/rejected": -499.026611328125, |
|
"loss": 0.1793, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.1890602111816406, |
|
"rewards/margins": 3.0602707862854004, |
|
"rewards/rejected": -5.249330997467041, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 63.82102776580896, |
|
"learning_rate": 2.27878296044029e-09, |
|
"logits/chosen": 126.34486389160156, |
|
"logits/rejected": 116.85221099853516, |
|
"logps/chosen": -431.28375244140625, |
|
"logps/rejected": -541.4762573242188, |
|
"loss": 0.1919, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -2.124760150909424, |
|
"rewards/margins": 3.1830201148986816, |
|
"rewards/rejected": -5.307779788970947, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_logits/chosen": 95.65170288085938, |
|
"eval_logits/rejected": 89.61892700195312, |
|
"eval_logps/chosen": -420.1343994140625, |
|
"eval_logps/rejected": -449.6919860839844, |
|
"eval_loss": 0.4766612648963928, |
|
"eval_rewards/accuracies": 0.71875, |
|
"eval_rewards/chosen": -2.8813698291778564, |
|
"eval_rewards/margins": 1.5896581411361694, |
|
"eval_rewards/rejected": -4.471027851104736, |
|
"eval_runtime": 46.5611, |
|
"eval_samples_per_second": 16.108, |
|
"eval_steps_per_second": 0.515, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"step": 104, |
|
"total_flos": 0.0, |
|
"train_loss": 0.38778070876231563, |
|
"train_runtime": 1146.4695, |
|
"train_samples_per_second": 11.775, |
|
"train_steps_per_second": 0.091 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 104, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|