|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 9.971509971509972, |
|
"eval_steps": 100, |
|
"global_step": 10500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 0.3346759080886841, |
|
"eval_runtime": 218.6538, |
|
"eval_samples_per_second": 103.456, |
|
"eval_steps_per_second": 6.467, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_loss": 0.31195777654647827, |
|
"eval_runtime": 218.3816, |
|
"eval_samples_per_second": 103.585, |
|
"eval_steps_per_second": 6.475, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 0.31596091389656067, |
|
"eval_runtime": 220.5173, |
|
"eval_samples_per_second": 102.582, |
|
"eval_steps_per_second": 6.412, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_loss": 0.3161667287349701, |
|
"eval_runtime": 218.5361, |
|
"eval_samples_per_second": 103.512, |
|
"eval_steps_per_second": 6.47, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 4.7625830959164296e-05, |
|
"loss": 0.1792, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 0.3097754120826721, |
|
"eval_runtime": 219.0553, |
|
"eval_samples_per_second": 103.266, |
|
"eval_steps_per_second": 6.455, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 0.30482736229896545, |
|
"eval_runtime": 218.842, |
|
"eval_samples_per_second": 103.367, |
|
"eval_steps_per_second": 6.461, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 0.3050496578216553, |
|
"eval_runtime": 219.6074, |
|
"eval_samples_per_second": 103.007, |
|
"eval_steps_per_second": 6.439, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_loss": 0.30001184344291687, |
|
"eval_runtime": 219.2072, |
|
"eval_samples_per_second": 103.195, |
|
"eval_steps_per_second": 6.451, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_loss": 0.3052184283733368, |
|
"eval_runtime": 219.1722, |
|
"eval_samples_per_second": 103.211, |
|
"eval_steps_per_second": 6.452, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.5251661918328584e-05, |
|
"loss": 0.1198, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 0.3005639314651489, |
|
"eval_runtime": 218.7588, |
|
"eval_samples_per_second": 103.406, |
|
"eval_steps_per_second": 6.464, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_loss": 0.29650744795799255, |
|
"eval_runtime": 218.5819, |
|
"eval_samples_per_second": 103.49, |
|
"eval_steps_per_second": 6.469, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_loss": 0.2948579788208008, |
|
"eval_runtime": 219.004, |
|
"eval_samples_per_second": 103.29, |
|
"eval_steps_per_second": 6.457, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"eval_loss": 0.28149962425231934, |
|
"eval_runtime": 218.3331, |
|
"eval_samples_per_second": 103.608, |
|
"eval_steps_per_second": 6.476, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_loss": 0.28821900486946106, |
|
"eval_runtime": 218.9147, |
|
"eval_samples_per_second": 103.333, |
|
"eval_steps_per_second": 6.459, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 4.287749287749288e-05, |
|
"loss": 0.1092, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"eval_loss": 0.28900569677352905, |
|
"eval_runtime": 218.7775, |
|
"eval_samples_per_second": 103.397, |
|
"eval_steps_per_second": 6.463, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_loss": 0.2834137976169586, |
|
"eval_runtime": 218.2122, |
|
"eval_samples_per_second": 103.665, |
|
"eval_steps_per_second": 6.48, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"eval_loss": 0.2864611744880676, |
|
"eval_runtime": 220.2143, |
|
"eval_samples_per_second": 102.723, |
|
"eval_steps_per_second": 6.421, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_loss": 0.28005194664001465, |
|
"eval_runtime": 218.8438, |
|
"eval_samples_per_second": 103.366, |
|
"eval_steps_per_second": 6.461, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_loss": 0.28052985668182373, |
|
"eval_runtime": 220.1784, |
|
"eval_samples_per_second": 102.739, |
|
"eval_steps_per_second": 6.422, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 4.050332383665717e-05, |
|
"loss": 0.099, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_loss": 0.2817462384700775, |
|
"eval_runtime": 219.1612, |
|
"eval_samples_per_second": 103.216, |
|
"eval_steps_per_second": 6.452, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_loss": 0.2855830788612366, |
|
"eval_runtime": 218.4225, |
|
"eval_samples_per_second": 103.565, |
|
"eval_steps_per_second": 6.474, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_loss": 0.2786203622817993, |
|
"eval_runtime": 220.2549, |
|
"eval_samples_per_second": 102.704, |
|
"eval_steps_per_second": 6.42, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"eval_loss": 0.282156765460968, |
|
"eval_runtime": 218.8776, |
|
"eval_samples_per_second": 103.35, |
|
"eval_steps_per_second": 6.46, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"eval_loss": 0.2802504599094391, |
|
"eval_runtime": 218.6184, |
|
"eval_samples_per_second": 103.473, |
|
"eval_steps_per_second": 6.468, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 3.8129154795821466e-05, |
|
"loss": 0.094, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_loss": 0.28312984108924866, |
|
"eval_runtime": 219.677, |
|
"eval_samples_per_second": 102.974, |
|
"eval_steps_per_second": 6.437, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"eval_loss": 0.2841149866580963, |
|
"eval_runtime": 219.3064, |
|
"eval_samples_per_second": 103.148, |
|
"eval_steps_per_second": 6.448, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_loss": 0.2737499177455902, |
|
"eval_runtime": 218.5172, |
|
"eval_samples_per_second": 103.52, |
|
"eval_steps_per_second": 6.471, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_loss": 0.27663424611091614, |
|
"eval_runtime": 219.4686, |
|
"eval_samples_per_second": 103.072, |
|
"eval_steps_per_second": 6.443, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"eval_loss": 0.27256855368614197, |
|
"eval_runtime": 218.7767, |
|
"eval_samples_per_second": 103.398, |
|
"eval_steps_per_second": 6.463, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 3.575498575498576e-05, |
|
"loss": 0.0891, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"eval_loss": 0.27302590012550354, |
|
"eval_runtime": 218.602, |
|
"eval_samples_per_second": 103.48, |
|
"eval_steps_per_second": 6.468, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"eval_loss": 0.27365198731422424, |
|
"eval_runtime": 219.6229, |
|
"eval_samples_per_second": 102.999, |
|
"eval_steps_per_second": 6.438, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"eval_loss": 0.26861417293548584, |
|
"eval_runtime": 219.0414, |
|
"eval_samples_per_second": 103.273, |
|
"eval_steps_per_second": 6.455, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"eval_loss": 0.2664526700973511, |
|
"eval_runtime": 218.6581, |
|
"eval_samples_per_second": 103.454, |
|
"eval_steps_per_second": 6.467, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"eval_loss": 0.27480828762054443, |
|
"eval_runtime": 220.5641, |
|
"eval_samples_per_second": 102.56, |
|
"eval_steps_per_second": 6.411, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 3.338081671415005e-05, |
|
"loss": 0.0862, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"eval_loss": 0.26794129610061646, |
|
"eval_runtime": 218.5638, |
|
"eval_samples_per_second": 103.498, |
|
"eval_steps_per_second": 6.47, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"eval_loss": 0.2703064978122711, |
|
"eval_runtime": 218.4977, |
|
"eval_samples_per_second": 103.53, |
|
"eval_steps_per_second": 6.471, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"eval_loss": 0.2635132670402527, |
|
"eval_runtime": 219.2033, |
|
"eval_samples_per_second": 103.196, |
|
"eval_steps_per_second": 6.451, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"eval_loss": 0.27066901326179504, |
|
"eval_runtime": 219.2382, |
|
"eval_samples_per_second": 103.18, |
|
"eval_steps_per_second": 6.45, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"eval_loss": 0.26447921991348267, |
|
"eval_runtime": 219.2131, |
|
"eval_samples_per_second": 103.192, |
|
"eval_steps_per_second": 6.45, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"learning_rate": 3.100664767331434e-05, |
|
"loss": 0.0838, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.8, |
|
"eval_loss": 0.2692434787750244, |
|
"eval_runtime": 219.4395, |
|
"eval_samples_per_second": 103.085, |
|
"eval_steps_per_second": 6.444, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"eval_loss": 0.2642222046852112, |
|
"eval_runtime": 219.413, |
|
"eval_samples_per_second": 103.098, |
|
"eval_steps_per_second": 6.444, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 3.99, |
|
"eval_loss": 0.2643529176712036, |
|
"eval_runtime": 219.2041, |
|
"eval_samples_per_second": 103.196, |
|
"eval_steps_per_second": 6.451, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"eval_loss": 0.25718453526496887, |
|
"eval_runtime": 219.1679, |
|
"eval_samples_per_second": 103.213, |
|
"eval_steps_per_second": 6.452, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"eval_loss": 0.26762890815734863, |
|
"eval_runtime": 218.6308, |
|
"eval_samples_per_second": 103.467, |
|
"eval_steps_per_second": 6.468, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"learning_rate": 2.863247863247863e-05, |
|
"loss": 0.0761, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.27, |
|
"eval_loss": 0.26568803191185, |
|
"eval_runtime": 220.7096, |
|
"eval_samples_per_second": 102.492, |
|
"eval_steps_per_second": 6.407, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"eval_loss": 0.26290062069892883, |
|
"eval_runtime": 219.3895, |
|
"eval_samples_per_second": 103.109, |
|
"eval_steps_per_second": 6.445, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 4.46, |
|
"eval_loss": 0.26172617077827454, |
|
"eval_runtime": 219.4776, |
|
"eval_samples_per_second": 103.067, |
|
"eval_steps_per_second": 6.443, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"eval_loss": 0.26161935925483704, |
|
"eval_runtime": 219.3677, |
|
"eval_samples_per_second": 103.119, |
|
"eval_steps_per_second": 6.446, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"eval_loss": 0.2620932459831238, |
|
"eval_runtime": 218.6157, |
|
"eval_samples_per_second": 103.474, |
|
"eval_steps_per_second": 6.468, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 2.6258309591642926e-05, |
|
"loss": 0.0741, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"eval_loss": 0.26253631711006165, |
|
"eval_runtime": 218.59, |
|
"eval_samples_per_second": 103.486, |
|
"eval_steps_per_second": 6.469, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"eval_loss": 0.25699007511138916, |
|
"eval_runtime": 219.4178, |
|
"eval_samples_per_second": 103.096, |
|
"eval_steps_per_second": 6.444, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"eval_loss": 0.2583966851234436, |
|
"eval_runtime": 218.5589, |
|
"eval_samples_per_second": 103.501, |
|
"eval_steps_per_second": 6.47, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"eval_loss": 0.25885534286499023, |
|
"eval_runtime": 219.0916, |
|
"eval_samples_per_second": 103.249, |
|
"eval_steps_per_second": 6.454, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"eval_loss": 0.25685915350914, |
|
"eval_runtime": 219.2049, |
|
"eval_samples_per_second": 103.196, |
|
"eval_steps_per_second": 6.451, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"learning_rate": 2.388414055080722e-05, |
|
"loss": 0.0769, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"eval_loss": 0.2619025707244873, |
|
"eval_runtime": 219.3928, |
|
"eval_samples_per_second": 103.107, |
|
"eval_steps_per_second": 6.445, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"eval_loss": 0.25433388352394104, |
|
"eval_runtime": 219.2766, |
|
"eval_samples_per_second": 103.162, |
|
"eval_steps_per_second": 6.448, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"eval_loss": 0.25473591685295105, |
|
"eval_runtime": 219.1683, |
|
"eval_samples_per_second": 103.213, |
|
"eval_steps_per_second": 6.452, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"eval_loss": 0.2583990693092346, |
|
"eval_runtime": 218.8181, |
|
"eval_samples_per_second": 103.378, |
|
"eval_steps_per_second": 6.462, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"eval_loss": 0.2513364255428314, |
|
"eval_runtime": 219.6254, |
|
"eval_samples_per_second": 102.998, |
|
"eval_steps_per_second": 6.438, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 2.150997150997151e-05, |
|
"loss": 0.0701, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"eval_loss": 0.25798743963241577, |
|
"eval_runtime": 218.5917, |
|
"eval_samples_per_second": 103.485, |
|
"eval_steps_per_second": 6.469, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"eval_loss": 0.252897173166275, |
|
"eval_runtime": 218.8327, |
|
"eval_samples_per_second": 103.371, |
|
"eval_steps_per_second": 6.462, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"eval_loss": 0.25619062781333923, |
|
"eval_runtime": 218.7633, |
|
"eval_samples_per_second": 103.404, |
|
"eval_steps_per_second": 6.464, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 5.98, |
|
"eval_loss": 0.25197675824165344, |
|
"eval_runtime": 218.5961, |
|
"eval_samples_per_second": 103.483, |
|
"eval_steps_per_second": 6.469, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"eval_loss": 0.2581734359264374, |
|
"eval_runtime": 219.6175, |
|
"eval_samples_per_second": 103.002, |
|
"eval_steps_per_second": 6.438, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"learning_rate": 1.91358024691358e-05, |
|
"loss": 0.0684, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 6.17, |
|
"eval_loss": 0.2550990581512451, |
|
"eval_runtime": 218.6851, |
|
"eval_samples_per_second": 103.441, |
|
"eval_steps_per_second": 6.466, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"eval_loss": 0.2555626332759857, |
|
"eval_runtime": 219.3776, |
|
"eval_samples_per_second": 103.114, |
|
"eval_steps_per_second": 6.446, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"eval_loss": 0.25543132424354553, |
|
"eval_runtime": 218.6367, |
|
"eval_samples_per_second": 103.464, |
|
"eval_steps_per_second": 6.467, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 6.46, |
|
"eval_loss": 0.25568485260009766, |
|
"eval_runtime": 219.9462, |
|
"eval_samples_per_second": 102.848, |
|
"eval_steps_per_second": 6.429, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"eval_loss": 0.25582244992256165, |
|
"eval_runtime": 218.6889, |
|
"eval_samples_per_second": 103.439, |
|
"eval_steps_per_second": 6.466, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 1.6761633428300098e-05, |
|
"loss": 0.0662, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"eval_loss": 0.25293371081352234, |
|
"eval_runtime": 219.8419, |
|
"eval_samples_per_second": 102.897, |
|
"eval_steps_per_second": 6.432, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"eval_loss": 0.249311164021492, |
|
"eval_runtime": 219.1307, |
|
"eval_samples_per_second": 103.231, |
|
"eval_steps_per_second": 6.453, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"eval_loss": 0.25428083539009094, |
|
"eval_runtime": 218.6179, |
|
"eval_samples_per_second": 103.473, |
|
"eval_steps_per_second": 6.468, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"eval_loss": 0.25905051827430725, |
|
"eval_runtime": 219.7362, |
|
"eval_samples_per_second": 102.946, |
|
"eval_steps_per_second": 6.435, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"eval_loss": 0.25161299109458923, |
|
"eval_runtime": 218.5372, |
|
"eval_samples_per_second": 103.511, |
|
"eval_steps_per_second": 6.47, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"learning_rate": 1.4387464387464389e-05, |
|
"loss": 0.0659, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 7.12, |
|
"eval_loss": 0.2567010223865509, |
|
"eval_runtime": 219.8696, |
|
"eval_samples_per_second": 102.884, |
|
"eval_steps_per_second": 6.431, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"eval_loss": 0.2568127512931824, |
|
"eval_runtime": 218.4916, |
|
"eval_samples_per_second": 103.533, |
|
"eval_steps_per_second": 6.472, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"eval_loss": 0.24921847879886627, |
|
"eval_runtime": 219.9605, |
|
"eval_samples_per_second": 102.841, |
|
"eval_steps_per_second": 6.428, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"eval_loss": 0.24751408398151398, |
|
"eval_runtime": 218.5269, |
|
"eval_samples_per_second": 103.516, |
|
"eval_steps_per_second": 6.471, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"eval_loss": 0.24565500020980835, |
|
"eval_runtime": 219.112, |
|
"eval_samples_per_second": 103.239, |
|
"eval_steps_per_second": 6.453, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"learning_rate": 1.2013295346628681e-05, |
|
"loss": 0.0641, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"eval_loss": 0.2555699646472931, |
|
"eval_runtime": 218.4879, |
|
"eval_samples_per_second": 103.534, |
|
"eval_steps_per_second": 6.472, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"eval_loss": 0.25907498598098755, |
|
"eval_runtime": 218.8796, |
|
"eval_samples_per_second": 103.349, |
|
"eval_steps_per_second": 6.46, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"eval_loss": 0.2498636245727539, |
|
"eval_runtime": 218.2333, |
|
"eval_samples_per_second": 103.655, |
|
"eval_steps_per_second": 6.479, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"eval_loss": 0.24620996415615082, |
|
"eval_runtime": 218.4049, |
|
"eval_samples_per_second": 103.574, |
|
"eval_steps_per_second": 6.474, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"eval_loss": 0.2514709234237671, |
|
"eval_runtime": 218.5279, |
|
"eval_samples_per_second": 103.515, |
|
"eval_steps_per_second": 6.471, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"learning_rate": 9.639126305792973e-06, |
|
"loss": 0.0614, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 8.07, |
|
"eval_loss": 0.24893517792224884, |
|
"eval_runtime": 218.4312, |
|
"eval_samples_per_second": 103.561, |
|
"eval_steps_per_second": 6.473, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"eval_loss": 0.24635082483291626, |
|
"eval_runtime": 218.6536, |
|
"eval_samples_per_second": 103.456, |
|
"eval_steps_per_second": 6.467, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"eval_loss": 0.24810662865638733, |
|
"eval_runtime": 218.4312, |
|
"eval_samples_per_second": 103.561, |
|
"eval_steps_per_second": 6.473, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"eval_loss": 0.24930644035339355, |
|
"eval_runtime": 218.7482, |
|
"eval_samples_per_second": 103.411, |
|
"eval_steps_per_second": 6.464, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"eval_loss": 0.24013535678386688, |
|
"eval_runtime": 218.398, |
|
"eval_samples_per_second": 103.577, |
|
"eval_steps_per_second": 6.474, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 7.264957264957266e-06, |
|
"loss": 0.0609, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"eval_loss": 0.2461770623922348, |
|
"eval_runtime": 218.9012, |
|
"eval_samples_per_second": 103.339, |
|
"eval_steps_per_second": 6.46, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"eval_loss": 0.24963241815567017, |
|
"eval_runtime": 218.2697, |
|
"eval_samples_per_second": 103.638, |
|
"eval_steps_per_second": 6.478, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 8.74, |
|
"eval_loss": 0.24689340591430664, |
|
"eval_runtime": 218.5291, |
|
"eval_samples_per_second": 103.515, |
|
"eval_steps_per_second": 6.471, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 8.83, |
|
"eval_loss": 0.2498210072517395, |
|
"eval_runtime": 218.2515, |
|
"eval_samples_per_second": 103.646, |
|
"eval_steps_per_second": 6.479, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"eval_loss": 0.24612723290920258, |
|
"eval_runtime": 218.3684, |
|
"eval_samples_per_second": 103.591, |
|
"eval_steps_per_second": 6.475, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"learning_rate": 4.890788224121558e-06, |
|
"loss": 0.0588, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 9.02, |
|
"eval_loss": 0.247446671128273, |
|
"eval_runtime": 218.2559, |
|
"eval_samples_per_second": 103.644, |
|
"eval_steps_per_second": 6.479, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 9.12, |
|
"eval_loss": 0.24682562053203583, |
|
"eval_runtime": 218.332, |
|
"eval_samples_per_second": 103.608, |
|
"eval_steps_per_second": 6.476, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 9.21, |
|
"eval_loss": 0.2363210916519165, |
|
"eval_runtime": 218.2595, |
|
"eval_samples_per_second": 103.643, |
|
"eval_steps_per_second": 6.479, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 9.31, |
|
"eval_loss": 0.24492982029914856, |
|
"eval_runtime": 218.2962, |
|
"eval_samples_per_second": 103.625, |
|
"eval_steps_per_second": 6.477, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"eval_loss": 0.247760608792305, |
|
"eval_runtime": 218.2063, |
|
"eval_samples_per_second": 103.668, |
|
"eval_steps_per_second": 6.48, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 2.51661918328585e-06, |
|
"loss": 0.0604, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"eval_loss": 0.24903397262096405, |
|
"eval_runtime": 218.2843, |
|
"eval_samples_per_second": 103.631, |
|
"eval_steps_per_second": 6.478, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 9.59, |
|
"eval_loss": 0.25066474080085754, |
|
"eval_runtime": 218.2601, |
|
"eval_samples_per_second": 103.642, |
|
"eval_steps_per_second": 6.479, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 9.69, |
|
"eval_loss": 0.24707233905792236, |
|
"eval_runtime": 218.2446, |
|
"eval_samples_per_second": 103.65, |
|
"eval_steps_per_second": 6.479, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"eval_loss": 0.24911357462406158, |
|
"eval_runtime": 218.2948, |
|
"eval_samples_per_second": 103.626, |
|
"eval_steps_per_second": 6.477, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"eval_loss": 0.24460090696811676, |
|
"eval_runtime": 218.238, |
|
"eval_samples_per_second": 103.653, |
|
"eval_steps_per_second": 6.479, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"learning_rate": 1.4245014245014247e-07, |
|
"loss": 0.0573, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"eval_loss": 0.24572330713272095, |
|
"eval_runtime": 218.3788, |
|
"eval_samples_per_second": 103.586, |
|
"eval_steps_per_second": 6.475, |
|
"step": 10500 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 10530, |
|
"num_train_epochs": 10, |
|
"save_steps": 100, |
|
"total_flos": 1.7676748920639283e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|