|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 5.119745254516602, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5664, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7218045112781954, |
|
"eval_f1": 0.6487146743656654, |
|
"eval_loss": 0.5220863223075867, |
|
"eval_precision": 0.6579765886287625, |
|
"eval_recall": 0.6431623931623932, |
|
"eval_runtime": 5.1231, |
|
"eval_samples_per_second": 77.883, |
|
"eval_steps_per_second": 9.76, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 4.081735134124756, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.5148, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7243107769423559, |
|
"eval_f1": 0.681013431013431, |
|
"eval_loss": 0.5110514163970947, |
|
"eval_precision": 0.6758417508417509, |
|
"eval_recall": 0.6899436261138389, |
|
"eval_runtime": 5.0632, |
|
"eval_samples_per_second": 78.804, |
|
"eval_steps_per_second": 9.875, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.849787950515747, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.4924, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7443609022556391, |
|
"eval_f1": 0.6799465240641711, |
|
"eval_loss": 0.47909805178642273, |
|
"eval_precision": 0.6884347940173183, |
|
"eval_recall": 0.674122567739589, |
|
"eval_runtime": 5.0526, |
|
"eval_samples_per_second": 78.969, |
|
"eval_steps_per_second": 9.896, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.3732385635375977, |
|
"learning_rate": 4e-05, |
|
"loss": 0.4615, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7644110275689223, |
|
"eval_f1": 0.7098675660622563, |
|
"eval_loss": 0.46511662006378174, |
|
"eval_precision": 0.714821259095223, |
|
"eval_recall": 0.705810147299509, |
|
"eval_runtime": 5.051, |
|
"eval_samples_per_second": 78.994, |
|
"eval_steps_per_second": 9.899, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.7670552730560303, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.4516, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7644110275689223, |
|
"eval_f1": 0.7285635710977304, |
|
"eval_loss": 0.45806992053985596, |
|
"eval_precision": 0.7213601158968073, |
|
"eval_recall": 0.7408165120931078, |
|
"eval_runtime": 5.0731, |
|
"eval_samples_per_second": 78.649, |
|
"eval_steps_per_second": 9.856, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 3.5449442863464355, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.4291, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7894736842105263, |
|
"eval_f1": 0.7421052631578947, |
|
"eval_loss": 0.42945122718811035, |
|
"eval_precision": 0.7461993243243243, |
|
"eval_recall": 0.7385433715220949, |
|
"eval_runtime": 5.05, |
|
"eval_samples_per_second": 79.01, |
|
"eval_steps_per_second": 9.901, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.7750324010849, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.4194, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7994987468671679, |
|
"eval_f1": 0.7593340973520719, |
|
"eval_loss": 0.41914287209510803, |
|
"eval_precision": 0.7580882352941176, |
|
"eval_recall": 0.7606382978723404, |
|
"eval_runtime": 5.057, |
|
"eval_samples_per_second": 78.901, |
|
"eval_steps_per_second": 9.887, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 7.397978782653809, |
|
"learning_rate": 3e-05, |
|
"loss": 0.3994, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8120300751879699, |
|
"eval_f1": 0.7691275064227686, |
|
"eval_loss": 0.4047784209251404, |
|
"eval_precision": 0.7745202893991822, |
|
"eval_recall": 0.7645026368430624, |
|
"eval_runtime": 5.0869, |
|
"eval_samples_per_second": 78.437, |
|
"eval_steps_per_second": 9.829, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 3.891740322113037, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.3919, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8245614035087719, |
|
"eval_f1": 0.7778255059182895, |
|
"eval_loss": 0.3949861526489258, |
|
"eval_precision": 0.7953535353535354, |
|
"eval_recall": 0.7658665211856701, |
|
"eval_runtime": 5.0525, |
|
"eval_samples_per_second": 78.97, |
|
"eval_steps_per_second": 9.896, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 5.2356157302856445, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.3762, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.8270676691729323, |
|
"eval_f1": 0.77771946680446, |
|
"eval_loss": 0.38808026909828186, |
|
"eval_precision": 0.8022497384025113, |
|
"eval_recall": 0.7626386615748317, |
|
"eval_runtime": 5.0638, |
|
"eval_samples_per_second": 78.795, |
|
"eval_steps_per_second": 9.874, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 4.264948844909668, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.3704, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.8270676691729323, |
|
"eval_f1": 0.7852914798206279, |
|
"eval_loss": 0.380621999502182, |
|
"eval_precision": 0.7949481615042823, |
|
"eval_recall": 0.777641389343517, |
|
"eval_runtime": 5.0487, |
|
"eval_samples_per_second": 79.031, |
|
"eval_steps_per_second": 9.904, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 9.925681114196777, |
|
"learning_rate": 2e-05, |
|
"loss": 0.3642, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8421052631578947, |
|
"eval_f1": 0.8060671053951256, |
|
"eval_loss": 0.37333759665489197, |
|
"eval_precision": 0.8121736395092797, |
|
"eval_recall": 0.8007819603564283, |
|
"eval_runtime": 5.0532, |
|
"eval_samples_per_second": 78.96, |
|
"eval_steps_per_second": 9.895, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 4.583708763122559, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.3614, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.8320802005012531, |
|
"eval_f1": 0.7841623808101279, |
|
"eval_loss": 0.3752892017364502, |
|
"eval_precision": 0.8092082316009765, |
|
"eval_recall": 0.7686852154937261, |
|
"eval_runtime": 5.0801, |
|
"eval_samples_per_second": 78.542, |
|
"eval_steps_per_second": 9.842, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 6.299562454223633, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.3474, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8395989974937343, |
|
"eval_f1": 0.796869033982436, |
|
"eval_loss": 0.3694915473461151, |
|
"eval_precision": 0.8155050505050505, |
|
"eval_recall": 0.7840061829423532, |
|
"eval_runtime": 5.0625, |
|
"eval_samples_per_second": 78.815, |
|
"eval_steps_per_second": 9.877, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 1.3626331090927124, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.3479, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.8421052631578947, |
|
"eval_f1": 0.8039617859231819, |
|
"eval_loss": 0.36751317977905273, |
|
"eval_precision": 0.8142185588254234, |
|
"eval_recall": 0.7957810511002, |
|
"eval_runtime": 5.052, |
|
"eval_samples_per_second": 78.979, |
|
"eval_steps_per_second": 9.897, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 5.876506328582764, |
|
"learning_rate": 1e-05, |
|
"loss": 0.3347, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8421052631578947, |
|
"eval_f1": 0.8039617859231819, |
|
"eval_loss": 0.36489802598953247, |
|
"eval_precision": 0.8142185588254234, |
|
"eval_recall": 0.7957810511002, |
|
"eval_runtime": 5.0659, |
|
"eval_samples_per_second": 78.761, |
|
"eval_steps_per_second": 9.87, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 3.045673131942749, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.335, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8370927318295739, |
|
"eval_f1": 0.794292376880795, |
|
"eval_loss": 0.3653377592563629, |
|
"eval_precision": 0.811438127090301, |
|
"eval_recall": 0.7822331332969631, |
|
"eval_runtime": 5.0657, |
|
"eval_samples_per_second": 78.765, |
|
"eval_steps_per_second": 9.87, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 2.8246428966522217, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3361, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8395989974937343, |
|
"eval_f1": 0.7991821327461466, |
|
"eval_loss": 0.363219290971756, |
|
"eval_precision": 0.8127623983206507, |
|
"eval_recall": 0.7890070921985816, |
|
"eval_runtime": 5.067, |
|
"eval_samples_per_second": 78.745, |
|
"eval_steps_per_second": 9.868, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 4.7039899826049805, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.3343, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.8370927318295739, |
|
"eval_f1": 0.794292376880795, |
|
"eval_loss": 0.36360475420951843, |
|
"eval_precision": 0.811438127090301, |
|
"eval_recall": 0.7822331332969631, |
|
"eval_runtime": 5.0464, |
|
"eval_samples_per_second": 79.066, |
|
"eval_steps_per_second": 9.908, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 6.12240743637085, |
|
"learning_rate": 0.0, |
|
"loss": 0.3347, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8395989974937343, |
|
"eval_f1": 0.7991821327461466, |
|
"eval_loss": 0.3632587492465973, |
|
"eval_precision": 0.8127623983206507, |
|
"eval_recall": 0.7890070921985816, |
|
"eval_runtime": 5.0479, |
|
"eval_samples_per_second": 79.042, |
|
"eval_steps_per_second": 9.905, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 7590599775312000.0, |
|
"train_loss": 0.3984348922479348, |
|
"train_runtime": 1950.6016, |
|
"train_samples_per_second": 37.301, |
|
"train_steps_per_second": 1.251 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 7590599775312000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|