|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9969788519637462, |
|
"eval_steps": 100, |
|
"global_step": 165, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 3.774768176591989, |
|
"learning_rate": 2.941176470588235e-08, |
|
"logits/chosen": 0.48741579055786133, |
|
"logits/rejected": -0.8717803955078125, |
|
"logps/chosen": -311.44610595703125, |
|
"logps/rejected": -1042.2933349609375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 4.065933729000048, |
|
"learning_rate": 2.941176470588235e-07, |
|
"logits/chosen": 0.3187962770462036, |
|
"logits/rejected": -0.46175992488861084, |
|
"logps/chosen": -526.5966796875, |
|
"logps/rejected": -899.632568359375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.0003381037386134267, |
|
"rewards/margins": 0.00014021807874087244, |
|
"rewards/rejected": 0.00019788570352829993, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 4.133159908424447, |
|
"learning_rate": 4.994932636402031e-07, |
|
"logits/chosen": 0.22923466563224792, |
|
"logits/rejected": -0.6458711624145508, |
|
"logps/chosen": -566.1712646484375, |
|
"logps/rejected": -926.1541137695312, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.0022346877958625555, |
|
"rewards/margins": 0.0030761375091969967, |
|
"rewards/rejected": -0.0008414499461650848, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 3.759041431537677, |
|
"learning_rate": 4.905416503522123e-07, |
|
"logits/chosen": 0.2407102882862091, |
|
"logits/rejected": -0.7926596999168396, |
|
"logps/chosen": -523.1210327148438, |
|
"logps/rejected": -1028.3199462890625, |
|
"loss": 0.6855, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.010289192199707031, |
|
"rewards/margins": 0.013627497479319572, |
|
"rewards/rejected": -0.003338304813951254, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 3.918700608724971, |
|
"learning_rate": 4.707922373336523e-07, |
|
"logits/chosen": 0.14743538200855255, |
|
"logits/rejected": -0.7249930500984192, |
|
"logps/chosen": -524.011474609375, |
|
"logps/rejected": -989.4501953125, |
|
"loss": 0.675, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 0.016446446999907494, |
|
"rewards/margins": 0.051999401301145554, |
|
"rewards/rejected": -0.03555295616388321, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 3.6017852179026626, |
|
"learning_rate": 4.4113156629677313e-07, |
|
"logits/chosen": 0.23459818959236145, |
|
"logits/rejected": -0.6225197911262512, |
|
"logps/chosen": -481.66455078125, |
|
"logps/rejected": -867.3211059570312, |
|
"loss": 0.6639, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.03439263254404068, |
|
"rewards/margins": 0.06260526925325394, |
|
"rewards/rejected": -0.02821262739598751, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 3.679527248386035, |
|
"learning_rate": 4.0289109058972283e-07, |
|
"logits/chosen": 0.26775047183036804, |
|
"logits/rejected": -0.49902766942977905, |
|
"logps/chosen": -516.3983154296875, |
|
"logps/rejected": -819.7734375, |
|
"loss": 0.6398, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 0.03639604151248932, |
|
"rewards/margins": 0.1593528836965561, |
|
"rewards/rejected": -0.12295685708522797, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 3.945875727521845, |
|
"learning_rate": 3.577874068920446e-07, |
|
"logits/chosen": 0.26115402579307556, |
|
"logits/rejected": -0.6307616233825684, |
|
"logps/chosen": -534.6641845703125, |
|
"logps/rejected": -911.5435791015625, |
|
"loss": 0.6322, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": 0.048932626843452454, |
|
"rewards/margins": 0.24001319706439972, |
|
"rewards/rejected": -0.19108060002326965, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 3.7126637404674536, |
|
"learning_rate": 3.078451980100854e-07, |
|
"logits/chosen": 0.20563717186450958, |
|
"logits/rejected": -0.688762903213501, |
|
"logps/chosen": -493.32684326171875, |
|
"logps/rejected": -957.6318359375, |
|
"loss": 0.6237, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 0.060481660068035126, |
|
"rewards/margins": 0.21118538081645966, |
|
"rewards/rejected": -0.15070374310016632, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 4.119949298182235, |
|
"learning_rate": 2.553063458334059e-07, |
|
"logits/chosen": 0.3919462263584137, |
|
"logits/rejected": -0.5500736832618713, |
|
"logps/chosen": -510.05712890625, |
|
"logps/rejected": -912.9411010742188, |
|
"loss": 0.6164, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.05354578420519829, |
|
"rewards/margins": 0.2815362215042114, |
|
"rewards/rejected": -0.22799046337604523, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 4.539444195047728, |
|
"learning_rate": 2.0252929432814287e-07, |
|
"logits/chosen": 0.23407666385173798, |
|
"logits/rejected": -0.6277016401290894, |
|
"logps/chosen": -514.2950439453125, |
|
"logps/rejected": -985.7261962890625, |
|
"loss": 0.6065, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 0.046576742082834244, |
|
"rewards/margins": 0.26194584369659424, |
|
"rewards/rejected": -0.2153691053390503, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_logits/chosen": -0.1363597810268402, |
|
"eval_logits/rejected": -0.3805391788482666, |
|
"eval_logps/chosen": -523.0221557617188, |
|
"eval_logps/rejected": -812.6375732421875, |
|
"eval_loss": 0.6296960115432739, |
|
"eval_rewards/accuracies": 0.7678571343421936, |
|
"eval_rewards/chosen": 0.07959667593240738, |
|
"eval_rewards/margins": 0.17506957054138184, |
|
"eval_rewards/rejected": -0.09547291696071625, |
|
"eval_runtime": 22.7695, |
|
"eval_samples_per_second": 9.135, |
|
"eval_steps_per_second": 0.307, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 4.166464578639834, |
|
"learning_rate": 1.5188318011445906e-07, |
|
"logits/chosen": 0.09842907637357712, |
|
"logits/rejected": -0.7154465913772583, |
|
"logps/chosen": -633.3096923828125, |
|
"logps/rejected": -972.07861328125, |
|
"loss": 0.5933, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.04117094725370407, |
|
"rewards/margins": 0.2970955967903137, |
|
"rewards/rejected": -0.25592464208602905, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 4.767777281679362, |
|
"learning_rate": 1.0564148305586295e-07, |
|
"logits/chosen": 0.2290249764919281, |
|
"logits/rejected": -0.5675751566886902, |
|
"logps/chosen": -553.788330078125, |
|
"logps/rejected": -963.9578247070312, |
|
"loss": 0.5795, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": 0.04246982932090759, |
|
"rewards/margins": 0.5371382832527161, |
|
"rewards/rejected": -0.4946684241294861, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 4.959401739670467, |
|
"learning_rate": 6.587997083462196e-08, |
|
"logits/chosen": 0.1415528953075409, |
|
"logits/rejected": -0.6273466348648071, |
|
"logps/chosen": -579.4324951171875, |
|
"logps/rejected": -927.8792114257812, |
|
"loss": 0.5587, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.02018633857369423, |
|
"rewards/margins": 0.7045117616653442, |
|
"rewards/rejected": -0.6843255162239075, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 4.589724744119317, |
|
"learning_rate": 3.438351873250492e-08, |
|
"logits/chosen": 0.2175011932849884, |
|
"logits/rejected": -0.5643750429153442, |
|
"logps/chosen": -543.2364501953125, |
|
"logps/rejected": -1040.180908203125, |
|
"loss": 0.5653, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": 0.03240719065070152, |
|
"rewards/margins": 0.5543904900550842, |
|
"rewards/rejected": -0.52198326587677, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 5.293978243611277, |
|
"learning_rate": 1.256598743236703e-08, |
|
"logits/chosen": 0.2741110026836395, |
|
"logits/rejected": -0.6036696434020996, |
|
"logps/chosen": -437.42901611328125, |
|
"logps/rejected": -982.3721923828125, |
|
"loss": 0.5555, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": 0.033161625266075134, |
|
"rewards/margins": 0.5011934638023376, |
|
"rewards/rejected": -0.46803179383277893, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 4.856259961602652, |
|
"learning_rate": 1.406755487774386e-09, |
|
"logits/chosen": 0.14368140697479248, |
|
"logits/rejected": -0.6074076294898987, |
|
"logps/chosen": -525.0721435546875, |
|
"logps/rejected": -952.7180786132812, |
|
"loss": 0.5519, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": 0.04387623816728592, |
|
"rewards/margins": 0.42462554574012756, |
|
"rewards/rejected": -0.38074928522109985, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 165, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6189163742643414, |
|
"train_runtime": 2381.9724, |
|
"train_samples_per_second": 4.446, |
|
"train_steps_per_second": 0.069 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 165, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|