|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 189, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.015873015873015872, |
|
"grad_norm": 8.48122634477978, |
|
"learning_rate": 2.6315789473684208e-08, |
|
"logits/chosen": -1.015625, |
|
"logits/rejected": -1.390625, |
|
"logps/chosen": -45.5, |
|
"logps/rejected": -80.5, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.15873015873015872, |
|
"grad_norm": 7.666414237190791, |
|
"learning_rate": 2.631578947368421e-07, |
|
"logits/chosen": -1.1953125, |
|
"logits/rejected": -1.21875, |
|
"logps/chosen": -55.0, |
|
"logps/rejected": -57.0, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.0833333358168602, |
|
"rewards/chosen": -0.00017452239990234375, |
|
"rewards/margins": -0.00244140625, |
|
"rewards/rejected": 0.00225830078125, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.31746031746031744, |
|
"grad_norm": 6.673773390918632, |
|
"learning_rate": 4.970588235294118e-07, |
|
"logits/chosen": -1.09375, |
|
"logits/rejected": -1.21875, |
|
"logps/chosen": -53.75, |
|
"logps/rejected": -56.75, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.20000000298023224, |
|
"rewards/chosen": -0.01470947265625, |
|
"rewards/margins": 0.0024871826171875, |
|
"rewards/rejected": -0.0172119140625, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.47619047619047616, |
|
"grad_norm": 7.379593048881886, |
|
"learning_rate": 4.676470588235294e-07, |
|
"logits/chosen": -1.1796875, |
|
"logits/rejected": -1.1015625, |
|
"logps/chosen": -60.0, |
|
"logps/rejected": -57.0, |
|
"loss": 0.6804, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.06689453125, |
|
"rewards/margins": 0.030029296875, |
|
"rewards/rejected": -0.09716796875, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.6349206349206349, |
|
"grad_norm": 9.322532970919411, |
|
"learning_rate": 4.38235294117647e-07, |
|
"logits/chosen": -1.3359375, |
|
"logits/rejected": -1.265625, |
|
"logps/chosen": -63.25, |
|
"logps/rejected": -56.5, |
|
"loss": 0.661, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.0185546875, |
|
"rewards/margins": 0.06494140625, |
|
"rewards/rejected": -0.08349609375, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.7936507936507936, |
|
"grad_norm": 7.879751058232231, |
|
"learning_rate": 4.0882352941176465e-07, |
|
"logits/chosen": -1.171875, |
|
"logits/rejected": -1.21875, |
|
"logps/chosen": -63.5, |
|
"logps/rejected": -55.5, |
|
"loss": 0.6611, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": 0.0830078125, |
|
"rewards/margins": 0.1220703125, |
|
"rewards/rejected": -0.0390625, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.9523809523809523, |
|
"grad_norm": 7.002381076132743, |
|
"learning_rate": 3.7941176470588235e-07, |
|
"logits/chosen": -1.25, |
|
"logits/rejected": -1.2265625, |
|
"logps/chosen": -62.5, |
|
"logps/rejected": -56.25, |
|
"loss": 0.645, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": 0.1005859375, |
|
"rewards/margins": 0.12451171875, |
|
"rewards/rejected": -0.024169921875, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_logits/chosen": -1.2578125, |
|
"eval_logits/rejected": -1.171875, |
|
"eval_logps/chosen": -65.0, |
|
"eval_logps/rejected": -57.25, |
|
"eval_loss": 0.6340625286102295, |
|
"eval_rewards/accuracies": 0.3928571343421936, |
|
"eval_rewards/chosen": 0.02734375, |
|
"eval_rewards/margins": 0.1787109375, |
|
"eval_rewards/rejected": -0.1513671875, |
|
"eval_runtime": 12.2628, |
|
"eval_samples_per_second": 16.309, |
|
"eval_steps_per_second": 0.571, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.1111111111111112, |
|
"grad_norm": 7.403427316810219, |
|
"learning_rate": 3.5e-07, |
|
"logits/chosen": -1.203125, |
|
"logits/rejected": -1.2109375, |
|
"logps/chosen": -58.75, |
|
"logps/rejected": -57.75, |
|
"loss": 0.6193, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": 0.044677734375, |
|
"rewards/margins": 0.1630859375, |
|
"rewards/rejected": -0.119140625, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.2698412698412698, |
|
"grad_norm": 7.698447091140595, |
|
"learning_rate": 3.205882352941177e-07, |
|
"logits/chosen": -1.1015625, |
|
"logits/rejected": -1.1640625, |
|
"logps/chosen": -56.5, |
|
"logps/rejected": -57.75, |
|
"loss": 0.5965, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.035400390625, |
|
"rewards/margins": 0.232421875, |
|
"rewards/rejected": -0.26953125, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.4285714285714286, |
|
"grad_norm": 5.644436155568419, |
|
"learning_rate": 2.911764705882353e-07, |
|
"logits/chosen": -1.25, |
|
"logits/rejected": -1.234375, |
|
"logps/chosen": -57.25, |
|
"logps/rejected": -58.75, |
|
"loss": 0.5885, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 0.0103759765625, |
|
"rewards/margins": 0.29296875, |
|
"rewards/rejected": -0.283203125, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.5873015873015874, |
|
"grad_norm": 7.133267052353533, |
|
"learning_rate": 2.6176470588235295e-07, |
|
"logits/chosen": -1.15625, |
|
"logits/rejected": -1.296875, |
|
"logps/chosen": -60.5, |
|
"logps/rejected": -61.5, |
|
"loss": 0.5796, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.025146484375, |
|
"rewards/margins": 0.3671875, |
|
"rewards/rejected": -0.392578125, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.746031746031746, |
|
"grad_norm": 3.8242948995128176, |
|
"learning_rate": 2.323529411764706e-07, |
|
"logits/chosen": -1.0234375, |
|
"logits/rejected": -1.171875, |
|
"logps/chosen": -61.75, |
|
"logps/rejected": -62.5, |
|
"loss": 0.5951, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.0732421875, |
|
"rewards/margins": 0.318359375, |
|
"rewards/rejected": -0.390625, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.9047619047619047, |
|
"grad_norm": 5.60510875244782, |
|
"learning_rate": 2.0294117647058823e-07, |
|
"logits/chosen": -1.1328125, |
|
"logits/rejected": -1.2265625, |
|
"logps/chosen": -61.5, |
|
"logps/rejected": -67.0, |
|
"loss": 0.5691, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.12451171875, |
|
"rewards/margins": 0.35546875, |
|
"rewards/rejected": -0.478515625, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_logits/chosen": -1.21875, |
|
"eval_logits/rejected": -1.109375, |
|
"eval_logps/chosen": -66.5, |
|
"eval_logps/rejected": -60.0, |
|
"eval_loss": 0.6115624904632568, |
|
"eval_rewards/accuracies": 0.3928571343421936, |
|
"eval_rewards/chosen": -0.11279296875, |
|
"eval_rewards/margins": 0.30078125, |
|
"eval_rewards/rejected": -0.4140625, |
|
"eval_runtime": 14.9499, |
|
"eval_samples_per_second": 13.378, |
|
"eval_steps_per_second": 0.468, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 2.0634920634920633, |
|
"grad_norm": 4.8582765968746875, |
|
"learning_rate": 1.7352941176470587e-07, |
|
"logits/chosen": -1.265625, |
|
"logits/rejected": -1.1875, |
|
"logps/chosen": -59.5, |
|
"logps/rejected": -65.0, |
|
"loss": 0.5577, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.002899169921875, |
|
"rewards/margins": 0.408203125, |
|
"rewards/rejected": -0.41015625, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.2222222222222223, |
|
"grad_norm": 3.9951840250758166, |
|
"learning_rate": 1.441176470588235e-07, |
|
"logits/chosen": -1.171875, |
|
"logits/rejected": -1.15625, |
|
"logps/chosen": -57.75, |
|
"logps/rejected": -58.0, |
|
"loss": 0.5505, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.052978515625, |
|
"rewards/margins": 0.33203125, |
|
"rewards/rejected": -0.38671875, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.380952380952381, |
|
"grad_norm": 6.94549887559355, |
|
"learning_rate": 1.1470588235294116e-07, |
|
"logits/chosen": -1.21875, |
|
"logits/rejected": -1.109375, |
|
"logps/chosen": -66.0, |
|
"logps/rejected": -57.75, |
|
"loss": 0.5602, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.048095703125, |
|
"rewards/margins": 0.3984375, |
|
"rewards/rejected": -0.4453125, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.5396825396825395, |
|
"grad_norm": 4.543691810100308, |
|
"learning_rate": 8.529411764705883e-08, |
|
"logits/chosen": -1.1484375, |
|
"logits/rejected": -1.3046875, |
|
"logps/chosen": -61.25, |
|
"logps/rejected": -65.0, |
|
"loss": 0.5427, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.11962890625, |
|
"rewards/margins": 0.53515625, |
|
"rewards/rejected": -0.65234375, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.6984126984126986, |
|
"grad_norm": 8.364914220576363, |
|
"learning_rate": 5.5882352941176474e-08, |
|
"logits/chosen": -1.171875, |
|
"logits/rejected": -1.15625, |
|
"logps/chosen": -57.0, |
|
"logps/rejected": -66.0, |
|
"loss": 0.5554, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.1552734375, |
|
"rewards/margins": 0.412109375, |
|
"rewards/rejected": -0.56640625, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.857142857142857, |
|
"grad_norm": 5.985657271989901, |
|
"learning_rate": 2.6470588235294116e-08, |
|
"logits/chosen": -0.96875, |
|
"logits/rejected": -1.125, |
|
"logps/chosen": -61.25, |
|
"logps/rejected": -62.0, |
|
"loss": 0.5432, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.047607421875, |
|
"rewards/margins": 0.48828125, |
|
"rewards/rejected": -0.53515625, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_logits/chosen": -1.1953125, |
|
"eval_logits/rejected": -1.0703125, |
|
"eval_logps/chosen": -67.5, |
|
"eval_logps/rejected": -61.25, |
|
"eval_loss": 0.6068750023841858, |
|
"eval_rewards/accuracies": 0.375, |
|
"eval_rewards/chosen": -0.2021484375, |
|
"eval_rewards/margins": 0.34375, |
|
"eval_rewards/rejected": -0.546875, |
|
"eval_runtime": 14.9623, |
|
"eval_samples_per_second": 13.367, |
|
"eval_steps_per_second": 0.468, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 189, |
|
"total_flos": 0.0, |
|
"train_loss": 0.601676173941799, |
|
"train_runtime": 1666.5095, |
|
"train_samples_per_second": 3.6, |
|
"train_steps_per_second": 0.113 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 189, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|