{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 189, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.015873015873015872, "grad_norm": 8.48122634477978, "learning_rate": 2.6315789473684208e-08, "logits/chosen": -1.015625, "logits/rejected": -1.390625, "logps/chosen": -45.5, "logps/rejected": -80.5, "loss": 0.6914, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.15873015873015872, "grad_norm": 7.666414237190791, "learning_rate": 2.631578947368421e-07, "logits/chosen": -1.1953125, "logits/rejected": -1.21875, "logps/chosen": -55.0, "logps/rejected": -57.0, "loss": 0.6925, "rewards/accuracies": 0.0833333358168602, "rewards/chosen": -0.00017452239990234375, "rewards/margins": -0.00244140625, "rewards/rejected": 0.00225830078125, "step": 10 }, { "epoch": 0.31746031746031744, "grad_norm": 6.673773390918632, "learning_rate": 4.970588235294118e-07, "logits/chosen": -1.09375, "logits/rejected": -1.21875, "logps/chosen": -53.75, "logps/rejected": -56.75, "loss": 0.69, "rewards/accuracies": 0.20000000298023224, "rewards/chosen": -0.01470947265625, "rewards/margins": 0.0024871826171875, "rewards/rejected": -0.0172119140625, "step": 20 }, { "epoch": 0.47619047619047616, "grad_norm": 7.379593048881886, "learning_rate": 4.676470588235294e-07, "logits/chosen": -1.1796875, "logits/rejected": -1.1015625, "logps/chosen": -60.0, "logps/rejected": -57.0, "loss": 0.6804, "rewards/accuracies": 0.375, "rewards/chosen": -0.06689453125, "rewards/margins": 0.030029296875, "rewards/rejected": -0.09716796875, "step": 30 }, { "epoch": 0.6349206349206349, "grad_norm": 9.322532970919411, "learning_rate": 4.38235294117647e-07, "logits/chosen": -1.3359375, "logits/rejected": -1.265625, "logps/chosen": -63.25, "logps/rejected": -56.5, "loss": 0.661, "rewards/accuracies": 0.4124999940395355, "rewards/chosen": -0.0185546875, "rewards/margins": 0.06494140625, "rewards/rejected": -0.08349609375, "step": 40 }, { "epoch": 0.7936507936507936, "grad_norm": 7.879751058232231, "learning_rate": 4.0882352941176465e-07, "logits/chosen": -1.171875, "logits/rejected": -1.21875, "logps/chosen": -63.5, "logps/rejected": -55.5, "loss": 0.6611, "rewards/accuracies": 0.375, "rewards/chosen": 0.0830078125, "rewards/margins": 0.1220703125, "rewards/rejected": -0.0390625, "step": 50 }, { "epoch": 0.9523809523809523, "grad_norm": 7.002381076132743, "learning_rate": 3.7941176470588235e-07, "logits/chosen": -1.25, "logits/rejected": -1.2265625, "logps/chosen": -62.5, "logps/rejected": -56.25, "loss": 0.645, "rewards/accuracies": 0.36250001192092896, "rewards/chosen": 0.1005859375, "rewards/margins": 0.12451171875, "rewards/rejected": -0.024169921875, "step": 60 }, { "epoch": 1.0, "eval_logits/chosen": -1.2578125, "eval_logits/rejected": -1.171875, "eval_logps/chosen": -65.0, "eval_logps/rejected": -57.25, "eval_loss": 0.6340625286102295, "eval_rewards/accuracies": 0.3928571343421936, "eval_rewards/chosen": 0.02734375, "eval_rewards/margins": 0.1787109375, "eval_rewards/rejected": -0.1513671875, "eval_runtime": 12.2628, "eval_samples_per_second": 16.309, "eval_steps_per_second": 0.571, "step": 63 }, { "epoch": 1.1111111111111112, "grad_norm": 7.403427316810219, "learning_rate": 3.5e-07, "logits/chosen": -1.203125, "logits/rejected": -1.2109375, "logps/chosen": -58.75, "logps/rejected": -57.75, "loss": 0.6193, "rewards/accuracies": 0.36250001192092896, "rewards/chosen": 0.044677734375, "rewards/margins": 0.1630859375, "rewards/rejected": -0.119140625, "step": 70 }, { "epoch": 1.2698412698412698, "grad_norm": 7.698447091140595, "learning_rate": 3.205882352941177e-07, "logits/chosen": -1.1015625, "logits/rejected": -1.1640625, "logps/chosen": -56.5, "logps/rejected": -57.75, "loss": 0.5965, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.035400390625, "rewards/margins": 0.232421875, "rewards/rejected": -0.26953125, "step": 80 }, { "epoch": 1.4285714285714286, "grad_norm": 5.644436155568419, "learning_rate": 2.911764705882353e-07, "logits/chosen": -1.25, "logits/rejected": -1.234375, "logps/chosen": -57.25, "logps/rejected": -58.75, "loss": 0.5885, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": 0.0103759765625, "rewards/margins": 0.29296875, "rewards/rejected": -0.283203125, "step": 90 }, { "epoch": 1.5873015873015874, "grad_norm": 7.133267052353533, "learning_rate": 2.6176470588235295e-07, "logits/chosen": -1.15625, "logits/rejected": -1.296875, "logps/chosen": -60.5, "logps/rejected": -61.5, "loss": 0.5796, "rewards/accuracies": 0.4124999940395355, "rewards/chosen": -0.025146484375, "rewards/margins": 0.3671875, "rewards/rejected": -0.392578125, "step": 100 }, { "epoch": 1.746031746031746, "grad_norm": 3.8242948995128176, "learning_rate": 2.323529411764706e-07, "logits/chosen": -1.0234375, "logits/rejected": -1.171875, "logps/chosen": -61.75, "logps/rejected": -62.5, "loss": 0.5951, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -0.0732421875, "rewards/margins": 0.318359375, "rewards/rejected": -0.390625, "step": 110 }, { "epoch": 1.9047619047619047, "grad_norm": 5.60510875244782, "learning_rate": 2.0294117647058823e-07, "logits/chosen": -1.1328125, "logits/rejected": -1.2265625, "logps/chosen": -61.5, "logps/rejected": -67.0, "loss": 0.5691, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -0.12451171875, "rewards/margins": 0.35546875, "rewards/rejected": -0.478515625, "step": 120 }, { "epoch": 2.0, "eval_logits/chosen": -1.21875, "eval_logits/rejected": -1.109375, "eval_logps/chosen": -66.5, "eval_logps/rejected": -60.0, "eval_loss": 0.6115624904632568, "eval_rewards/accuracies": 0.3928571343421936, "eval_rewards/chosen": -0.11279296875, "eval_rewards/margins": 0.30078125, "eval_rewards/rejected": -0.4140625, "eval_runtime": 14.9499, "eval_samples_per_second": 13.378, "eval_steps_per_second": 0.468, "step": 126 }, { "epoch": 2.0634920634920633, "grad_norm": 4.8582765968746875, "learning_rate": 1.7352941176470587e-07, "logits/chosen": -1.265625, "logits/rejected": -1.1875, "logps/chosen": -59.5, "logps/rejected": -65.0, "loss": 0.5577, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.002899169921875, "rewards/margins": 0.408203125, "rewards/rejected": -0.41015625, "step": 130 }, { "epoch": 2.2222222222222223, "grad_norm": 3.9951840250758166, "learning_rate": 1.441176470588235e-07, "logits/chosen": -1.171875, "logits/rejected": -1.15625, "logps/chosen": -57.75, "logps/rejected": -58.0, "loss": 0.5505, "rewards/accuracies": 0.36250001192092896, "rewards/chosen": -0.052978515625, "rewards/margins": 0.33203125, "rewards/rejected": -0.38671875, "step": 140 }, { "epoch": 2.380952380952381, "grad_norm": 6.94549887559355, "learning_rate": 1.1470588235294116e-07, "logits/chosen": -1.21875, "logits/rejected": -1.109375, "logps/chosen": -66.0, "logps/rejected": -57.75, "loss": 0.5602, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -0.048095703125, "rewards/margins": 0.3984375, "rewards/rejected": -0.4453125, "step": 150 }, { "epoch": 2.5396825396825395, "grad_norm": 4.543691810100308, "learning_rate": 8.529411764705883e-08, "logits/chosen": -1.1484375, "logits/rejected": -1.3046875, "logps/chosen": -61.25, "logps/rejected": -65.0, "loss": 0.5427, "rewards/accuracies": 0.4375, "rewards/chosen": -0.11962890625, "rewards/margins": 0.53515625, "rewards/rejected": -0.65234375, "step": 160 }, { "epoch": 2.6984126984126986, "grad_norm": 8.364914220576363, "learning_rate": 5.5882352941176474e-08, "logits/chosen": -1.171875, "logits/rejected": -1.15625, "logps/chosen": -57.0, "logps/rejected": -66.0, "loss": 0.5554, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.1552734375, "rewards/margins": 0.412109375, "rewards/rejected": -0.56640625, "step": 170 }, { "epoch": 2.857142857142857, "grad_norm": 5.985657271989901, "learning_rate": 2.6470588235294116e-08, "logits/chosen": -0.96875, "logits/rejected": -1.125, "logps/chosen": -61.25, "logps/rejected": -62.0, "loss": 0.5432, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.047607421875, "rewards/margins": 0.48828125, "rewards/rejected": -0.53515625, "step": 180 }, { "epoch": 3.0, "eval_logits/chosen": -1.1953125, "eval_logits/rejected": -1.0703125, "eval_logps/chosen": -67.5, "eval_logps/rejected": -61.25, "eval_loss": 0.6068750023841858, "eval_rewards/accuracies": 0.375, "eval_rewards/chosen": -0.2021484375, "eval_rewards/margins": 0.34375, "eval_rewards/rejected": -0.546875, "eval_runtime": 14.9623, "eval_samples_per_second": 13.367, "eval_steps_per_second": 0.468, "step": 189 }, { "epoch": 3.0, "step": 189, "total_flos": 0.0, "train_loss": 0.601676173941799, "train_runtime": 1666.5095, "train_samples_per_second": 3.6, "train_steps_per_second": 0.113 } ], "logging_steps": 10, "max_steps": 189, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }