|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 10, |
|
"global_step": 81, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0, |
|
"loss": 1.0172, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.1546487678572864e-05, |
|
"loss": 2.3198, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4795, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 5e-05, |
|
"loss": 1.551, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2034, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0527, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9743, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9738, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 5e-05, |
|
"loss": 0.939, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 5e-05, |
|
"loss": 0.955, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.2181755564165483, |
|
"eval_loss": 0.9061336517333984, |
|
"eval_runtime": 44.217, |
|
"eval_samples_per_second": 0.814, |
|
"eval_steps_per_second": 0.045, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9513, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 5e-05, |
|
"loss": 0.94, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9211, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8762, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9013, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8758, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8919, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8589, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8775, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8899, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.21962848163933016, |
|
"eval_loss": 0.8734426498413086, |
|
"eval_runtime": 44.8031, |
|
"eval_samples_per_second": 0.804, |
|
"eval_steps_per_second": 0.045, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9011, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9195, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 5e-05, |
|
"loss": 0.815, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8216, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8559, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9071, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6228, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3934, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8846, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 5e-05, |
|
"loss": 0.879, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_accuracy": 0.2173899005553403, |
|
"eval_loss": 0.9090863466262817, |
|
"eval_runtime": 45.3228, |
|
"eval_samples_per_second": 0.794, |
|
"eval_steps_per_second": 0.044, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8277, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6593, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5255, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4102, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5839, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6197, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4026, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3835, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3433, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3295, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_accuracy": 0.21730380128287916, |
|
"eval_loss": 0.9803322553634644, |
|
"eval_runtime": 44.8946, |
|
"eval_samples_per_second": 0.802, |
|
"eval_steps_per_second": 0.045, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3101, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3492, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 5e-05, |
|
"loss": 0.2991, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3412, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3824, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3694, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3926, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3993, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3678, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3711, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_accuracy": 0.2173576133281674, |
|
"eval_loss": 0.9820255637168884, |
|
"eval_runtime": 44.7843, |
|
"eval_samples_per_second": 0.804, |
|
"eval_steps_per_second": 0.045, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3825, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3914, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3962, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.2342, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 5e-05, |
|
"loss": 0.1572, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4409, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4072, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4306, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 5e-05, |
|
"loss": 0.338, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 5e-05, |
|
"loss": 0.2927, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_accuracy": 0.2152804683800422, |
|
"eval_loss": 1.0269818305969238, |
|
"eval_runtime": 44.7279, |
|
"eval_samples_per_second": 0.805, |
|
"eval_steps_per_second": 0.045, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3043, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3134, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 5e-05, |
|
"loss": 0.2565, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 5e-05, |
|
"loss": 0.2333, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 5e-05, |
|
"loss": 0.2291, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 5e-05, |
|
"loss": 0.1783, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 5e-05, |
|
"loss": 0.2058, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 5e-05, |
|
"loss": 0.208, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 5e-05, |
|
"loss": 0.1745, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 5e-05, |
|
"loss": 0.1703, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"eval_accuracy": 0.2131279865685135, |
|
"eval_loss": 1.0966472625732422, |
|
"eval_runtime": 44.8433, |
|
"eval_samples_per_second": 0.803, |
|
"eval_steps_per_second": 0.045, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 5e-05, |
|
"loss": 0.2235, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 5e-05, |
|
"loss": 0.2062, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 5e-05, |
|
"loss": 0.2094, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 5e-05, |
|
"loss": 0.2123, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 5e-05, |
|
"loss": 0.2144, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 5e-05, |
|
"loss": 0.2091, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 5e-05, |
|
"loss": 0.2116, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 5e-05, |
|
"loss": 0.2123, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 5e-05, |
|
"loss": 0.1891, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 5e-05, |
|
"loss": 0.2011, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_accuracy": 0.2144840501097766, |
|
"eval_loss": 1.1488189697265625, |
|
"eval_runtime": 44.8519, |
|
"eval_samples_per_second": 0.803, |
|
"eval_steps_per_second": 0.045, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.1493, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 81, |
|
"total_flos": 10840779079680.0, |
|
"train_loss": 0.5814771251178082, |
|
"train_runtime": 6697.7067, |
|
"train_samples_per_second": 0.377, |
|
"train_steps_per_second": 0.012 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 81, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 10840779079680.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|