|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9999847145752639, |
|
"eval_steps": 500, |
|
"global_step": 40888, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.9981412639405207e-05, |
|
"loss": 3.4921, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.9956955585991002e-05, |
|
"loss": 2.95, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.9932498532576798e-05, |
|
"loss": 2.8209, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.9908041479162593e-05, |
|
"loss": 2.7532, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.988358442574839e-05, |
|
"loss": 2.7287, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9859127372334184e-05, |
|
"loss": 2.6975, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9834670318919976e-05, |
|
"loss": 2.7061, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.981021326550577e-05, |
|
"loss": 2.6844, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.978575621209157e-05, |
|
"loss": 2.6694, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9761299158677366e-05, |
|
"loss": 2.6722, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"eval_accuracy": 0.4950875237178296, |
|
"eval_loss": 2.48111629486084, |
|
"eval_runtime": 33.1501, |
|
"eval_samples_per_second": 73.393, |
|
"eval_steps_per_second": 4.615, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9736842105263158e-05, |
|
"loss": 2.6674, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.9712385051848954e-05, |
|
"loss": 2.6716, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.968792799843475e-05, |
|
"loss": 2.6449, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9663470945020548e-05, |
|
"loss": 2.6395, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.963901389160634e-05, |
|
"loss": 2.6398, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9615045979260422e-05, |
|
"loss": 2.6342, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9590588925846218e-05, |
|
"loss": 2.6433, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.956613187243201e-05, |
|
"loss": 2.619, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9541674819017805e-05, |
|
"loss": 2.6278, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.9517706906671884e-05, |
|
"loss": 2.6243, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_accuracy": 0.49991353398275346, |
|
"eval_loss": 2.445934295654297, |
|
"eval_runtime": 32.8968, |
|
"eval_samples_per_second": 73.959, |
|
"eval_steps_per_second": 4.651, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.949324985325768e-05, |
|
"loss": 2.6264, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.946928194091176e-05, |
|
"loss": 2.6283, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.944531402856584e-05, |
|
"loss": 2.6186, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9420856975151636e-05, |
|
"loss": 2.6125, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.939639992173743e-05, |
|
"loss": 2.606, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9371942868323227e-05, |
|
"loss": 2.6225, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9347974955977306e-05, |
|
"loss": 2.6235, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.9324007043631385e-05, |
|
"loss": 2.6114, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.929954999021718e-05, |
|
"loss": 2.6051, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9275092936802975e-05, |
|
"loss": 2.6051, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_accuracy": 0.5025316445514746, |
|
"eval_loss": 2.4294509887695312, |
|
"eval_runtime": 32.8294, |
|
"eval_samples_per_second": 74.11, |
|
"eval_steps_per_second": 4.66, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9251614165525338e-05, |
|
"loss": 2.5985, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9227157112111133e-05, |
|
"loss": 2.6018, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9202700058696932e-05, |
|
"loss": 2.6135, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9179221287419294e-05, |
|
"loss": 2.5797, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9154764234005086e-05, |
|
"loss": 2.592, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9130307180590885e-05, |
|
"loss": 2.5919, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9106339268244964e-05, |
|
"loss": 2.5968, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.908188221483076e-05, |
|
"loss": 2.6137, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.905938172568969e-05, |
|
"loss": 2.584, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9034924672275487e-05, |
|
"loss": 2.5901, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_accuracy": 0.5037453861331029, |
|
"eval_loss": 2.4193975925445557, |
|
"eval_runtime": 32.9991, |
|
"eval_samples_per_second": 73.729, |
|
"eval_steps_per_second": 4.636, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9010467618861282e-05, |
|
"loss": 2.601, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.8986010565447078e-05, |
|
"loss": 2.5843, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.8962042653101156e-05, |
|
"loss": 2.58, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.8938074740755235e-05, |
|
"loss": 2.5896, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.891361768734103e-05, |
|
"loss": 2.5862, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.8889160633926826e-05, |
|
"loss": 2.5904, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.8865192721580905e-05, |
|
"loss": 2.6002, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.88407356681667e-05, |
|
"loss": 2.5939, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.8817746037957346e-05, |
|
"loss": 2.597, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.8793288984543145e-05, |
|
"loss": 2.5852, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"eval_accuracy": 0.5048827158855367, |
|
"eval_loss": 2.412391185760498, |
|
"eval_runtime": 32.6918, |
|
"eval_samples_per_second": 74.422, |
|
"eval_steps_per_second": 4.68, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.8769810213265507e-05, |
|
"loss": 2.5863, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.8746331441987872e-05, |
|
"loss": 2.5691, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.8721874388573668e-05, |
|
"loss": 2.5908, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.8697417335159463e-05, |
|
"loss": 2.5939, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.8672960281745255e-05, |
|
"loss": 2.5759, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.864850322833105e-05, |
|
"loss": 2.5764, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.862453531598513e-05, |
|
"loss": 2.5856, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.860056740363921e-05, |
|
"loss": 2.5867, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.8576110350225007e-05, |
|
"loss": 2.5822, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.8552142437879086e-05, |
|
"loss": 2.5818, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_accuracy": 0.5054369027309588, |
|
"eval_loss": 2.407233476638794, |
|
"eval_runtime": 32.7819, |
|
"eval_samples_per_second": 74.218, |
|
"eval_steps_per_second": 4.667, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.8528174525533165e-05, |
|
"loss": 2.5806, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.8504206613187247e-05, |
|
"loss": 2.5786, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.847974955977304e-05, |
|
"loss": 2.5866, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.8455292506358834e-05, |
|
"loss": 2.578, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.843083545294463e-05, |
|
"loss": 2.5848, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.8406378399530425e-05, |
|
"loss": 2.5773, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.8382410487184504e-05, |
|
"loss": 2.5732, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.8357953433770303e-05, |
|
"loss": 2.572, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.83334963803561e-05, |
|
"loss": 2.5753, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.830903932694189e-05, |
|
"loss": 2.5801, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_accuracy": 0.5058640046394045, |
|
"eval_loss": 2.4024484157562256, |
|
"eval_runtime": 32.8872, |
|
"eval_samples_per_second": 73.98, |
|
"eval_steps_per_second": 4.652, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.8285560555664256e-05, |
|
"loss": 2.5758, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.8261592643318335e-05, |
|
"loss": 2.5774, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.823713558990413e-05, |
|
"loss": 2.5818, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.8212678536489926e-05, |
|
"loss": 2.5772, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.8189199765212288e-05, |
|
"loss": 2.5673, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.8164742711798083e-05, |
|
"loss": 2.5721, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.8140774799452162e-05, |
|
"loss": 2.5678, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.811680688710624e-05, |
|
"loss": 2.5684, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.8092349833692036e-05, |
|
"loss": 2.5663, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.8067892780277835e-05, |
|
"loss": 2.5626, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_accuracy": 0.5069546829592774, |
|
"eval_loss": 2.398844003677368, |
|
"eval_runtime": 32.7613, |
|
"eval_samples_per_second": 74.265, |
|
"eval_steps_per_second": 4.67, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.804343572686363e-05, |
|
"loss": 2.5592, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.801946781451771e-05, |
|
"loss": 2.5735, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.7995499902171788e-05, |
|
"loss": 2.5712, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.7971042848757584e-05, |
|
"loss": 2.5609, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.794658579534338e-05, |
|
"loss": 2.5638, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.7922617882997458e-05, |
|
"loss": 2.5806, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.789913911171982e-05, |
|
"loss": 2.5638, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.7874682058305615e-05, |
|
"loss": 2.5546, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.7850714145959694e-05, |
|
"loss": 2.5689, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.782625709254549e-05, |
|
"loss": 2.5697, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_accuracy": 0.5073463941257803, |
|
"eval_loss": 2.395766258239746, |
|
"eval_runtime": 33.0876, |
|
"eval_samples_per_second": 73.532, |
|
"eval_steps_per_second": 4.624, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.780180003913129e-05, |
|
"loss": 2.5672, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.7777342985717084e-05, |
|
"loss": 2.5697, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.7752885932302876e-05, |
|
"loss": 2.5668, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.7728428878888672e-05, |
|
"loss": 2.5706, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.770446096654275e-05, |
|
"loss": 2.5661, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.7680493054196833e-05, |
|
"loss": 2.5561, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.7656036000782628e-05, |
|
"loss": 2.556, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.763157894736842e-05, |
|
"loss": 2.5593, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.7608100176090786e-05, |
|
"loss": 2.5714, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.7584132263744868e-05, |
|
"loss": 2.5532, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_accuracy": 0.5078651902292596, |
|
"eval_loss": 2.3927862644195557, |
|
"eval_runtime": 32.7397, |
|
"eval_samples_per_second": 74.314, |
|
"eval_steps_per_second": 4.673, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.755967521033066e-05, |
|
"loss": 2.5543, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.7535218156916455e-05, |
|
"loss": 2.553, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.751076110350225e-05, |
|
"loss": 2.554, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.7486304050088046e-05, |
|
"loss": 2.5722, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.7461846996673842e-05, |
|
"loss": 2.5687, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.743787908432792e-05, |
|
"loss": 2.5644, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.7413911171982e-05, |
|
"loss": 2.562, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.7389454118567798e-05, |
|
"loss": 2.5497, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.7364997065153594e-05, |
|
"loss": 2.5639, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.7340540011739386e-05, |
|
"loss": 2.5505, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_accuracy": 0.5079761884653529, |
|
"eval_loss": 2.3904242515563965, |
|
"eval_runtime": 32.835, |
|
"eval_samples_per_second": 74.098, |
|
"eval_steps_per_second": 4.66, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.7316572099393468e-05, |
|
"loss": 2.561, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.729309332811583e-05, |
|
"loss": 2.5537, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.7268636274701626e-05, |
|
"loss": 2.5581, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.724417922128742e-05, |
|
"loss": 2.5636, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.7219722167873217e-05, |
|
"loss": 2.549, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.7196732537663862e-05, |
|
"loss": 2.5434, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.7172764625317944e-05, |
|
"loss": 2.5545, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.714830757190374e-05, |
|
"loss": 2.5686, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.712385051848953e-05, |
|
"loss": 2.5578, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.709939346507533e-05, |
|
"loss": 2.5497, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_accuracy": 0.5085874830989099, |
|
"eval_loss": 2.387216806411743, |
|
"eval_runtime": 32.9586, |
|
"eval_samples_per_second": 73.82, |
|
"eval_steps_per_second": 4.642, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.7074936411661126e-05, |
|
"loss": 2.5619, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.705047935824692e-05, |
|
"loss": 2.5541, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.7027489728037567e-05, |
|
"loss": 2.5574, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.7003032674623362e-05, |
|
"loss": 2.5557, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.6978575621209158e-05, |
|
"loss": 2.5529, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.6954118567794953e-05, |
|
"loss": 2.5527, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.692966151438075e-05, |
|
"loss": 2.5603, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.690618274310311e-05, |
|
"loss": 2.5397, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.688172568968891e-05, |
|
"loss": 2.5471, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.6857268636274705e-05, |
|
"loss": 2.5636, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.5088537179985249, |
|
"eval_loss": 2.3856565952301025, |
|
"eval_runtime": 32.985, |
|
"eval_samples_per_second": 73.761, |
|
"eval_steps_per_second": 4.638, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.6833300723928784e-05, |
|
"loss": 2.5447, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.6809821952651146e-05, |
|
"loss": 2.552, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.678536489923694e-05, |
|
"loss": 2.5511, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.6760907845822737e-05, |
|
"loss": 2.5507, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.6736939933476816e-05, |
|
"loss": 2.5357, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.671248288006261e-05, |
|
"loss": 2.5449, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.668851496771669e-05, |
|
"loss": 2.5525, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.666405791430249e-05, |
|
"loss": 2.5403, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.6640579143024848e-05, |
|
"loss": 2.5628, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.661661123067893e-05, |
|
"loss": 2.5483, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_accuracy": 0.5091706260059216, |
|
"eval_loss": 2.383516550064087, |
|
"eval_runtime": 32.7354, |
|
"eval_samples_per_second": 74.323, |
|
"eval_steps_per_second": 4.674, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.6592154177264725e-05, |
|
"loss": 2.5551, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.656769712385052e-05, |
|
"loss": 2.5489, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.6543240070436316e-05, |
|
"loss": 2.553, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.6519761299158678e-05, |
|
"loss": 2.5399, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.6496282527881044e-05, |
|
"loss": 2.5468, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.647182547446684e-05, |
|
"loss": 2.5422, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.644736842105263e-05, |
|
"loss": 2.5484, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.6423400508706713e-05, |
|
"loss": 2.534, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.6399432596360792e-05, |
|
"loss": 2.5363, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.6374975542946588e-05, |
|
"loss": 2.5505, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_accuracy": 0.5096669007281646, |
|
"eval_loss": 2.3813159465789795, |
|
"eval_runtime": 33.3592, |
|
"eval_samples_per_second": 72.933, |
|
"eval_steps_per_second": 4.586, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.6350518489532383e-05, |
|
"loss": 2.5522, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.6326061436118175e-05, |
|
"loss": 2.5449, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.6302093523772257e-05, |
|
"loss": 2.5383, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.6277636470358053e-05, |
|
"loss": 2.5492, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.625366855801213e-05, |
|
"loss": 2.547, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.622970064566621e-05, |
|
"loss": 2.543, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.6205243592252006e-05, |
|
"loss": 2.5517, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.61807865388378e-05, |
|
"loss": 2.5545, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.615681862649188e-05, |
|
"loss": 2.5452, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.613285071414596e-05, |
|
"loss": 2.5419, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.5095663588476452, |
|
"eval_loss": 2.3796439170837402, |
|
"eval_runtime": 31.6594, |
|
"eval_samples_per_second": 76.849, |
|
"eval_steps_per_second": 4.833, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.610888280180004e-05, |
|
"loss": 2.5429, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.6084425748385837e-05, |
|
"loss": 2.542, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.6060457836039915e-05, |
|
"loss": 2.5469, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.603697906476228e-05, |
|
"loss": 2.5533, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.6012522011348076e-05, |
|
"loss": 2.5465, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.598806495793387e-05, |
|
"loss": 2.5359, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.5963607904519664e-05, |
|
"loss": 2.5437, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.5939639992173743e-05, |
|
"loss": 2.5483, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.5915182938759538e-05, |
|
"loss": 2.5304, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.5890725885345334e-05, |
|
"loss": 2.5467, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.5099267009474262, |
|
"eval_loss": 2.3786227703094482, |
|
"eval_runtime": 33.2478, |
|
"eval_samples_per_second": 73.178, |
|
"eval_steps_per_second": 4.602, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.5866268831931132e-05, |
|
"loss": 2.5326, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.5841811778516925e-05, |
|
"loss": 2.5463, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.5817843866171007e-05, |
|
"loss": 2.5317, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.5793875953825085e-05, |
|
"loss": 2.5485, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.5769908041479164e-05, |
|
"loss": 2.5401, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.574545098806496e-05, |
|
"loss": 2.5376, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.5721972216787322e-05, |
|
"loss": 2.5477, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.5697515163373117e-05, |
|
"loss": 2.5336, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.5673547251027196e-05, |
|
"loss": 2.5269, |
|
"step": 8950 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.564909019761299e-05, |
|
"loss": 2.5419, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_accuracy": 0.51021787023341, |
|
"eval_loss": 2.3769397735595703, |
|
"eval_runtime": 32.7948, |
|
"eval_samples_per_second": 74.189, |
|
"eval_steps_per_second": 4.665, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.5624633144198787e-05, |
|
"loss": 2.5284, |
|
"step": 9050 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.5600176090784586e-05, |
|
"loss": 2.5469, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.5575719037370378e-05, |
|
"loss": 2.5307, |
|
"step": 9150 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.5551261983956173e-05, |
|
"loss": 2.5331, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.552778321267854e-05, |
|
"loss": 2.5359, |
|
"step": 9250 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.5503326159264334e-05, |
|
"loss": 2.5388, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.5479358246918413e-05, |
|
"loss": 2.5388, |
|
"step": 9350 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.545490119350421e-05, |
|
"loss": 2.549, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.5430933281158287e-05, |
|
"loss": 2.5469, |
|
"step": 9450 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.5406476227744083e-05, |
|
"loss": 2.5269, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_accuracy": 0.5104511273962147, |
|
"eval_loss": 2.3753819465637207, |
|
"eval_runtime": 32.6269, |
|
"eval_samples_per_second": 74.57, |
|
"eval_steps_per_second": 4.689, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.5382508315398162e-05, |
|
"loss": 2.5363, |
|
"step": 9550 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.5358051261983957e-05, |
|
"loss": 2.5245, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.5334083349638036e-05, |
|
"loss": 2.5369, |
|
"step": 9650 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.5310115437292118e-05, |
|
"loss": 2.5294, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.5286147524946197e-05, |
|
"loss": 2.5394, |
|
"step": 9750 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.5261690471531992e-05, |
|
"loss": 2.5266, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.5237233418117788e-05, |
|
"loss": 2.538, |
|
"step": 9850 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.5213265505771867e-05, |
|
"loss": 2.5272, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.5188808452357662e-05, |
|
"loss": 2.5478, |
|
"step": 9950 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.5164351398943456e-05, |
|
"loss": 2.5315, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_accuracy": 0.5106103857349571, |
|
"eval_loss": 2.37398099899292, |
|
"eval_runtime": 33.5869, |
|
"eval_samples_per_second": 72.439, |
|
"eval_steps_per_second": 4.555, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.5140383486597536e-05, |
|
"loss": 2.537, |
|
"step": 10050 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.51169047153199e-05, |
|
"loss": 2.5316, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.5092447661905696e-05, |
|
"loss": 2.5337, |
|
"step": 10150 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.506799060849149e-05, |
|
"loss": 2.5375, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.5043533555077285e-05, |
|
"loss": 2.5388, |
|
"step": 10250 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.5019076501663082e-05, |
|
"loss": 2.5292, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.4995108589317159e-05, |
|
"loss": 2.5219, |
|
"step": 10350 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.4971140676971238e-05, |
|
"loss": 2.5335, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.4946683623557035e-05, |
|
"loss": 2.5389, |
|
"step": 10450 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.4922715711211116e-05, |
|
"loss": 2.5442, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_accuracy": 0.5107744700839645, |
|
"eval_loss": 2.372843027114868, |
|
"eval_runtime": 32.6321, |
|
"eval_samples_per_second": 74.558, |
|
"eval_steps_per_second": 4.689, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.489825865779691e-05, |
|
"loss": 2.5302, |
|
"step": 10550 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.487429074545099e-05, |
|
"loss": 2.5339, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.4850811974173354e-05, |
|
"loss": 2.5355, |
|
"step": 10650 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.4826354920759147e-05, |
|
"loss": 2.5411, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.4801897867344943e-05, |
|
"loss": 2.5354, |
|
"step": 10750 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.4777440813930738e-05, |
|
"loss": 2.5302, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.4752983760516536e-05, |
|
"loss": 2.5341, |
|
"step": 10850 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.4729015848170614e-05, |
|
"loss": 2.5327, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.4705047935824693e-05, |
|
"loss": 2.523, |
|
"step": 10950 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.4681569164547057e-05, |
|
"loss": 2.5318, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_accuracy": 0.5111710072607324, |
|
"eval_loss": 2.3712527751922607, |
|
"eval_runtime": 32.9046, |
|
"eval_samples_per_second": 73.941, |
|
"eval_steps_per_second": 4.65, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.4657112111132852e-05, |
|
"loss": 2.5355, |
|
"step": 11050 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.4633144198786931e-05, |
|
"loss": 2.5284, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.4608687145372727e-05, |
|
"loss": 2.5394, |
|
"step": 11150 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.4584230091958522e-05, |
|
"loss": 2.5381, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.45602621796126e-05, |
|
"loss": 2.5397, |
|
"step": 11250 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.4536783408334965e-05, |
|
"loss": 2.5412, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.451232635492076e-05, |
|
"loss": 2.5417, |
|
"step": 11350 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.4487869301506557e-05, |
|
"loss": 2.5314, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.446341224809235e-05, |
|
"loss": 2.5305, |
|
"step": 11450 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.4438955194678146e-05, |
|
"loss": 2.5242, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_accuracy": 0.5112618971207219, |
|
"eval_loss": 2.37015700340271, |
|
"eval_runtime": 32.8429, |
|
"eval_samples_per_second": 74.08, |
|
"eval_steps_per_second": 4.659, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.4414987282332227e-05, |
|
"loss": 2.5308, |
|
"step": 11550 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.4391019369986306e-05, |
|
"loss": 2.5333, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.4366562316572101e-05, |
|
"loss": 2.532, |
|
"step": 11650 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.4342105263157895e-05, |
|
"loss": 2.5302, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.431764820974369e-05, |
|
"loss": 2.526, |
|
"step": 11750 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.4293680297397771e-05, |
|
"loss": 2.537, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.426971238505185e-05, |
|
"loss": 2.5316, |
|
"step": 11850 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.4245744472705928e-05, |
|
"loss": 2.5305, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.4221287419291726e-05, |
|
"loss": 2.517, |
|
"step": 11950 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.4196830365877521e-05, |
|
"loss": 2.5178, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_accuracy": 0.511163768245335, |
|
"eval_loss": 2.3697993755340576, |
|
"eval_runtime": 32.6726, |
|
"eval_samples_per_second": 74.466, |
|
"eval_steps_per_second": 4.683, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.41728624535316e-05, |
|
"loss": 2.5196, |
|
"step": 12050 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.4148894541185679e-05, |
|
"loss": 2.5385, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.4124437487771474e-05, |
|
"loss": 2.537, |
|
"step": 12150 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.4100469575425555e-05, |
|
"loss": 2.5318, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.4076012522011348e-05, |
|
"loss": 2.5331, |
|
"step": 12250 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.4051555468597144e-05, |
|
"loss": 2.53, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.4027098415182941e-05, |
|
"loss": 2.5301, |
|
"step": 12350 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.4003130502837018e-05, |
|
"loss": 2.5358, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.3978673449422815e-05, |
|
"loss": 2.5182, |
|
"step": 12450 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.3954705537076894e-05, |
|
"loss": 2.5345, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_accuracy": 0.5114412638355682, |
|
"eval_loss": 2.3686695098876953, |
|
"eval_runtime": 33.0698, |
|
"eval_samples_per_second": 73.572, |
|
"eval_steps_per_second": 4.627, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.393024848366269e-05, |
|
"loss": 2.5227, |
|
"step": 12550 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.3905791430248485e-05, |
|
"loss": 2.5324, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.388133437683428e-05, |
|
"loss": 2.5163, |
|
"step": 12650 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.3856877323420074e-05, |
|
"loss": 2.5227, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.3832909411074157e-05, |
|
"loss": 2.5351, |
|
"step": 12750 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.3808452357659949e-05, |
|
"loss": 2.5269, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.3783995304245746e-05, |
|
"loss": 2.5377, |
|
"step": 12850 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.3761005674036395e-05, |
|
"loss": 2.5336, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.3736548620622188e-05, |
|
"loss": 2.526, |
|
"step": 12950 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.3712091567207984e-05, |
|
"loss": 2.531, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_accuracy": 0.511461372211672, |
|
"eval_loss": 2.367508888244629, |
|
"eval_runtime": 32.6315, |
|
"eval_samples_per_second": 74.56, |
|
"eval_steps_per_second": 4.689, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.368763451379378e-05, |
|
"loss": 2.5183, |
|
"step": 13050 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.3664155742516143e-05, |
|
"loss": 2.5289, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.3639698689101939e-05, |
|
"loss": 2.5191, |
|
"step": 13150 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.3615241635687732e-05, |
|
"loss": 2.5241, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.3590784582273528e-05, |
|
"loss": 2.5237, |
|
"step": 13250 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.3566816669927607e-05, |
|
"loss": 2.5301, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.3542848757581689e-05, |
|
"loss": 2.5191, |
|
"step": 13350 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.3519369986304053e-05, |
|
"loss": 2.5184, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.3494912932889848e-05, |
|
"loss": 2.527, |
|
"step": 13450 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.3470945020543927e-05, |
|
"loss": 2.5304, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.511769432533583, |
|
"eval_loss": 2.3661422729492188, |
|
"eval_runtime": 32.5737, |
|
"eval_samples_per_second": 74.692, |
|
"eval_steps_per_second": 4.697, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.3446487967129722e-05, |
|
"loss": 2.5411, |
|
"step": 13550 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.3422030913715516e-05, |
|
"loss": 2.5291, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.339855214243788e-05, |
|
"loss": 2.5233, |
|
"step": 13650 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.3374095089023675e-05, |
|
"loss": 2.5259, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.334963803560947e-05, |
|
"loss": 2.5218, |
|
"step": 13750 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.3326159264331835e-05, |
|
"loss": 2.5301, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.330170221091763e-05, |
|
"loss": 2.519, |
|
"step": 13850 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.3277245157503424e-05, |
|
"loss": 2.5291, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.3253277245157506e-05, |
|
"loss": 2.5241, |
|
"step": 13950 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.32288201917433e-05, |
|
"loss": 2.5264, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_accuracy": 0.5121217312829225, |
|
"eval_loss": 2.3653178215026855, |
|
"eval_runtime": 32.9474, |
|
"eval_samples_per_second": 73.845, |
|
"eval_steps_per_second": 4.644, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.320485227939738e-05, |
|
"loss": 2.5259, |
|
"step": 14050 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.3180884367051459e-05, |
|
"loss": 2.5211, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.3156427313637254e-05, |
|
"loss": 2.5171, |
|
"step": 14150 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.313197026022305e-05, |
|
"loss": 2.5365, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.3108002347877129e-05, |
|
"loss": 2.5259, |
|
"step": 14250 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.3083545294462924e-05, |
|
"loss": 2.5333, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.3059577382117003e-05, |
|
"loss": 2.5238, |
|
"step": 14350 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.3035609469771082e-05, |
|
"loss": 2.5238, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.3011152416356879e-05, |
|
"loss": 2.5237, |
|
"step": 14450 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.2987673645079241e-05, |
|
"loss": 2.5281, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_accuracy": 0.5123228150439609, |
|
"eval_loss": 2.3646602630615234, |
|
"eval_runtime": 32.7106, |
|
"eval_samples_per_second": 74.38, |
|
"eval_steps_per_second": 4.677, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.296370573273332e-05, |
|
"loss": 2.5268, |
|
"step": 14550 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.2939248679319117e-05, |
|
"loss": 2.5184, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.2914791625904912e-05, |
|
"loss": 2.5281, |
|
"step": 14650 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.2890823713558991e-05, |
|
"loss": 2.5295, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.2866366660144787e-05, |
|
"loss": 2.5167, |
|
"step": 14750 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.2841909606730582e-05, |
|
"loss": 2.5253, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.281745255331638e-05, |
|
"loss": 2.5242, |
|
"step": 14850 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.2793484640970456e-05, |
|
"loss": 2.5257, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.277000586969282e-05, |
|
"loss": 2.5361, |
|
"step": 14950 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.2745548816278617e-05, |
|
"loss": 2.5259, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_accuracy": 0.51232844538927, |
|
"eval_loss": 2.3635854721069336, |
|
"eval_runtime": 32.8056, |
|
"eval_samples_per_second": 74.164, |
|
"eval_steps_per_second": 4.664, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.2721580903932696e-05, |
|
"loss": 2.5093, |
|
"step": 15050 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.2697123850518492e-05, |
|
"loss": 2.5149, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.2672666797104285e-05, |
|
"loss": 2.5249, |
|
"step": 15150 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.2648698884758366e-05, |
|
"loss": 2.5316, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.2624241831344161e-05, |
|
"loss": 2.5086, |
|
"step": 15250 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.260027391899824e-05, |
|
"loss": 2.5219, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.2575816865584036e-05, |
|
"loss": 2.5162, |
|
"step": 15350 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.2551848953238114e-05, |
|
"loss": 2.516, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.2527391899823912e-05, |
|
"loss": 2.5206, |
|
"step": 15450 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.2502934846409707e-05, |
|
"loss": 2.5075, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_accuracy": 0.5121699913855717, |
|
"eval_loss": 2.362924337387085, |
|
"eval_runtime": 32.9662, |
|
"eval_samples_per_second": 73.803, |
|
"eval_steps_per_second": 4.641, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.2478966934063786e-05, |
|
"loss": 2.5232, |
|
"step": 15550 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.2454999021717865e-05, |
|
"loss": 2.5285, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.243054196830366e-05, |
|
"loss": 2.5247, |
|
"step": 15650 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.2406574055957739e-05, |
|
"loss": 2.5207, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.238260614361182e-05, |
|
"loss": 2.518, |
|
"step": 15750 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.2358149090197613e-05, |
|
"loss": 2.5202, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.2333692036783409e-05, |
|
"loss": 2.5151, |
|
"step": 15850 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.2310213265505772e-05, |
|
"loss": 2.5139, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.2285756212091568e-05, |
|
"loss": 2.5141, |
|
"step": 15950 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.2261788299745647e-05, |
|
"loss": 2.5147, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"eval_accuracy": 0.5127105045352431, |
|
"eval_loss": 2.36214280128479, |
|
"eval_runtime": 32.6511, |
|
"eval_samples_per_second": 74.515, |
|
"eval_steps_per_second": 4.686, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.2237820387399729e-05, |
|
"loss": 2.5304, |
|
"step": 16050 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.2213363333985523e-05, |
|
"loss": 2.5098, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.2188906280571318e-05, |
|
"loss": 2.5328, |
|
"step": 16150 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.2165427509293682e-05, |
|
"loss": 2.505, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.214145959694776e-05, |
|
"loss": 2.5194, |
|
"step": 16250 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.2117002543533556e-05, |
|
"loss": 2.5094, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.2092545490119351e-05, |
|
"loss": 2.5258, |
|
"step": 16350 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.2068088436705147e-05, |
|
"loss": 2.5215, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.204363138329094e-05, |
|
"loss": 2.5172, |
|
"step": 16450 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.2019663470945023e-05, |
|
"loss": 2.5137, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.5127627863131131, |
|
"eval_loss": 2.361149787902832, |
|
"eval_runtime": 33.1496, |
|
"eval_samples_per_second": 73.395, |
|
"eval_steps_per_second": 4.615, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.1995206417530815e-05, |
|
"loss": 2.525, |
|
"step": 16550 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.1971238505184897e-05, |
|
"loss": 2.5122, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.1947759733907261e-05, |
|
"loss": 2.5112, |
|
"step": 16650 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.1923302680493056e-05, |
|
"loss": 2.5175, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.189884562707885e-05, |
|
"loss": 2.5067, |
|
"step": 16750 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.1874388573664646e-05, |
|
"loss": 2.5203, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.1849931520250441e-05, |
|
"loss": 2.52, |
|
"step": 16850 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.182596360790452e-05, |
|
"loss": 2.5236, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.1801995695558599e-05, |
|
"loss": 2.5242, |
|
"step": 16950 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.177802778321268e-05, |
|
"loss": 2.5206, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_accuracy": 0.5128585021833675, |
|
"eval_loss": 2.3603355884552, |
|
"eval_runtime": 32.6199, |
|
"eval_samples_per_second": 74.586, |
|
"eval_steps_per_second": 4.69, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.1753570729798476e-05, |
|
"loss": 2.5156, |
|
"step": 17050 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.172911367638427e-05, |
|
"loss": 2.5109, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.170514576403835e-05, |
|
"loss": 2.519, |
|
"step": 17150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.1680688710624144e-05, |
|
"loss": 2.537, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.165623165720994e-05, |
|
"loss": 2.5155, |
|
"step": 17250 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.1632752885932304e-05, |
|
"loss": 2.5149, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.1608784973586382e-05, |
|
"loss": 2.5142, |
|
"step": 17350 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.1584327920172178e-05, |
|
"loss": 2.5056, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.1559870866757973e-05, |
|
"loss": 2.524, |
|
"step": 17450 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.1536392095480337e-05, |
|
"loss": 2.5153, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_accuracy": 0.5128440241525727, |
|
"eval_loss": 2.359724998474121, |
|
"eval_runtime": 32.6238, |
|
"eval_samples_per_second": 74.577, |
|
"eval_steps_per_second": 4.69, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.1511935042066134e-05, |
|
"loss": 2.5147, |
|
"step": 17550 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.1487477988651926e-05, |
|
"loss": 2.5183, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.1463020935237724e-05, |
|
"loss": 2.5125, |
|
"step": 17650 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.1438563881823519e-05, |
|
"loss": 2.5165, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.1414595969477598e-05, |
|
"loss": 2.525, |
|
"step": 17750 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.1390138916063393e-05, |
|
"loss": 2.5231, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.1367149285854042e-05, |
|
"loss": 2.5239, |
|
"step": 17850 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.1343670514576404e-05, |
|
"loss": 2.5146, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.13192134611622e-05, |
|
"loss": 2.5353, |
|
"step": 17950 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.1294756407747995e-05, |
|
"loss": 2.5184, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_accuracy": 0.5130169561870658, |
|
"eval_loss": 2.358973264694214, |
|
"eval_runtime": 32.9217, |
|
"eval_samples_per_second": 73.903, |
|
"eval_steps_per_second": 4.647, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.1270788495402074e-05, |
|
"loss": 2.5251, |
|
"step": 18050 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.124633144198787e-05, |
|
"loss": 2.5143, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.1221874388573667e-05, |
|
"loss": 2.502, |
|
"step": 18150 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.1197906476227745e-05, |
|
"loss": 2.5125, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.1173938563881826e-05, |
|
"loss": 2.5081, |
|
"step": 18250 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.114948151046762e-05, |
|
"loss": 2.5133, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.1125024457053415e-05, |
|
"loss": 2.517, |
|
"step": 18350 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.110056740363921e-05, |
|
"loss": 2.512, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.1076110350225006e-05, |
|
"loss": 2.5067, |
|
"step": 18450 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.1053120720015653e-05, |
|
"loss": 2.5104, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_accuracy": 0.5131923012266913, |
|
"eval_loss": 2.35813307762146, |
|
"eval_runtime": 32.6499, |
|
"eval_samples_per_second": 74.518, |
|
"eval_steps_per_second": 4.686, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.1028663666601449e-05, |
|
"loss": 2.5125, |
|
"step": 18550 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.1004695754255527e-05, |
|
"loss": 2.5192, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.0980727841909608e-05, |
|
"loss": 2.5033, |
|
"step": 18650 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.0956270788495402e-05, |
|
"loss": 2.5033, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.0932302876149484e-05, |
|
"loss": 2.5134, |
|
"step": 18750 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.0908334963803562e-05, |
|
"loss": 2.5148, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.0884367051457641e-05, |
|
"loss": 2.5006, |
|
"step": 18850 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.0859909998043437e-05, |
|
"loss": 2.5072, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.0835452944629232e-05, |
|
"loss": 2.5137, |
|
"step": 18950 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.0810995891215028e-05, |
|
"loss": 2.5085, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_accuracy": 0.5133596029158755, |
|
"eval_loss": 2.3577353954315186, |
|
"eval_runtime": 32.7631, |
|
"eval_samples_per_second": 74.26, |
|
"eval_steps_per_second": 4.67, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.0786538837800822e-05, |
|
"loss": 2.5277, |
|
"step": 19050 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.0762081784386617e-05, |
|
"loss": 2.5148, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.073860301310898e-05, |
|
"loss": 2.5241, |
|
"step": 19150 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.0714145959694778e-05, |
|
"loss": 2.5142, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.0690178047348857e-05, |
|
"loss": 2.5231, |
|
"step": 19250 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.0666210135002937e-05, |
|
"loss": 2.5105, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.0641753081588731e-05, |
|
"loss": 2.5149, |
|
"step": 19350 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.0617785169242811e-05, |
|
"loss": 2.5092, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.059381725689689e-05, |
|
"loss": 2.5284, |
|
"step": 19450 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.0569360203482686e-05, |
|
"loss": 2.509, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_accuracy": 0.5135132309093088, |
|
"eval_loss": 2.357166290283203, |
|
"eval_runtime": 32.782, |
|
"eval_samples_per_second": 74.218, |
|
"eval_steps_per_second": 4.667, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.054490315006848e-05, |
|
"loss": 2.5078, |
|
"step": 19550 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.0520446096654275e-05, |
|
"loss": 2.5138, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.049598904324007e-05, |
|
"loss": 2.5065, |
|
"step": 19650 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.0472510271962434e-05, |
|
"loss": 2.5247, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.0448053218548231e-05, |
|
"loss": 2.5161, |
|
"step": 19750 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.0423596165134025e-05, |
|
"loss": 2.5142, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.0399628252788106e-05, |
|
"loss": 2.5067, |
|
"step": 19850 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.0375171199373901e-05, |
|
"loss": 2.5104, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.035120328702798e-05, |
|
"loss": 2.5177, |
|
"step": 19950 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.0326746233613775e-05, |
|
"loss": 2.5143, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.5135220785947945, |
|
"eval_loss": 2.356419563293457, |
|
"eval_runtime": 32.6508, |
|
"eval_samples_per_second": 74.516, |
|
"eval_steps_per_second": 4.686, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.0302778321267854e-05, |
|
"loss": 2.5068, |
|
"step": 20050 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.027832126785365e-05, |
|
"loss": 2.5129, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.0254842496576013e-05, |
|
"loss": 2.521, |
|
"step": 20150 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.0231363725298377e-05, |
|
"loss": 2.5188, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.0206906671884171e-05, |
|
"loss": 2.5141, |
|
"step": 20250 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.0182938759538253e-05, |
|
"loss": 2.5039, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.015897084719233e-05, |
|
"loss": 2.5123, |
|
"step": 20350 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.0135002934846409e-05, |
|
"loss": 2.5113, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.0110545881432206e-05, |
|
"loss": 2.503, |
|
"step": 20450 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.0086088828018002e-05, |
|
"loss": 2.5124, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_accuracy": 0.5137456837370693, |
|
"eval_loss": 2.355473279953003, |
|
"eval_runtime": 32.7777, |
|
"eval_samples_per_second": 74.227, |
|
"eval_steps_per_second": 4.668, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.0061631774603797e-05, |
|
"loss": 2.5079, |
|
"step": 20550 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.0037174721189591e-05, |
|
"loss": 2.5096, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.0012717667775386e-05, |
|
"loss": 2.5166, |
|
"step": 20650 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.988260614361182e-06, |
|
"loss": 2.5056, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.963803560946977e-06, |
|
"loss": 2.5089, |
|
"step": 20750 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.939835648601058e-06, |
|
"loss": 2.5149, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.915867736255137e-06, |
|
"loss": 2.5188, |
|
"step": 20850 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.891410682840932e-06, |
|
"loss": 2.5172, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.866953629426728e-06, |
|
"loss": 2.5132, |
|
"step": 20950 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.842496576012523e-06, |
|
"loss": 2.5107, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_accuracy": 0.513869551333869, |
|
"eval_loss": 2.354633092880249, |
|
"eval_runtime": 31.6609, |
|
"eval_samples_per_second": 76.845, |
|
"eval_steps_per_second": 4.832, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 9.818528663666602e-06, |
|
"loss": 2.5121, |
|
"step": 21050 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.795049892388966e-06, |
|
"loss": 2.4904, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.770592838974761e-06, |
|
"loss": 2.5034, |
|
"step": 21150 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.747114067697125e-06, |
|
"loss": 2.4998, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.72265701428292e-06, |
|
"loss": 2.4988, |
|
"step": 21250 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.698199960868716e-06, |
|
"loss": 2.5118, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.673742907454511e-06, |
|
"loss": 2.5137, |
|
"step": 21350 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.649285854040305e-06, |
|
"loss": 2.5139, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.625807082762669e-06, |
|
"loss": 2.5133, |
|
"step": 21450 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.601350029348466e-06, |
|
"loss": 2.5034, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy": 0.513999853611022, |
|
"eval_loss": 2.354314088821411, |
|
"eval_runtime": 32.5811, |
|
"eval_samples_per_second": 74.675, |
|
"eval_steps_per_second": 4.696, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.577382117002543e-06, |
|
"loss": 2.5222, |
|
"step": 21550 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.553414204656623e-06, |
|
"loss": 2.5016, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.528957151242419e-06, |
|
"loss": 2.5188, |
|
"step": 21650 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.504500097828214e-06, |
|
"loss": 2.5084, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.48004304441401e-06, |
|
"loss": 2.5053, |
|
"step": 21750 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.455585990999805e-06, |
|
"loss": 2.506, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.431618078653884e-06, |
|
"loss": 2.5099, |
|
"step": 21850 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.40716102523968e-06, |
|
"loss": 2.502, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.383193112893758e-06, |
|
"loss": 2.5054, |
|
"step": 21950 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.358736059479556e-06, |
|
"loss": 2.4922, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_accuracy": 0.5139121810912092, |
|
"eval_loss": 2.353778839111328, |
|
"eval_runtime": 32.8198, |
|
"eval_samples_per_second": 74.132, |
|
"eval_steps_per_second": 4.662, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.33427900606535e-06, |
|
"loss": 2.5071, |
|
"step": 22050 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.309821952651145e-06, |
|
"loss": 2.5125, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.285854040305224e-06, |
|
"loss": 2.5201, |
|
"step": 22150 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.261886127959304e-06, |
|
"loss": 2.5112, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.237918215613385e-06, |
|
"loss": 2.5034, |
|
"step": 22250 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.213461162199178e-06, |
|
"loss": 2.4971, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.189004108784974e-06, |
|
"loss": 2.5133, |
|
"step": 22350 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.16454705537077e-06, |
|
"loss": 2.5301, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.14057914302485e-06, |
|
"loss": 2.5093, |
|
"step": 22450 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.116611230678929e-06, |
|
"loss": 2.514, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_accuracy": 0.513995027600757, |
|
"eval_loss": 2.3532376289367676, |
|
"eval_runtime": 32.6183, |
|
"eval_samples_per_second": 74.59, |
|
"eval_steps_per_second": 4.691, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.092154177264724e-06, |
|
"loss": 2.51, |
|
"step": 22550 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.06769712385052e-06, |
|
"loss": 2.4977, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 9.043240070436315e-06, |
|
"loss": 2.5125, |
|
"step": 22650 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.019761299158679e-06, |
|
"loss": 2.5002, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.995793386812758e-06, |
|
"loss": 2.5025, |
|
"step": 22750 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.971336333398553e-06, |
|
"loss": 2.5102, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.947368421052632e-06, |
|
"loss": 2.499, |
|
"step": 22850 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.922911367638427e-06, |
|
"loss": 2.5112, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.898454314224223e-06, |
|
"loss": 2.5066, |
|
"step": 22950 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.873997260810018e-06, |
|
"loss": 2.5199, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_accuracy": 0.5140706350949075, |
|
"eval_loss": 2.3526594638824463, |
|
"eval_runtime": 32.7339, |
|
"eval_samples_per_second": 74.327, |
|
"eval_steps_per_second": 4.674, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.849540207395814e-06, |
|
"loss": 2.5014, |
|
"step": 23050 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.825572295049893e-06, |
|
"loss": 2.4882, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.801115241635688e-06, |
|
"loss": 2.4938, |
|
"step": 23150 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.777147329289769e-06, |
|
"loss": 2.5049, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.752690275875562e-06, |
|
"loss": 2.4916, |
|
"step": 23250 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.728233222461358e-06, |
|
"loss": 2.504, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.704265310115438e-06, |
|
"loss": 2.5074, |
|
"step": 23350 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.680297397769517e-06, |
|
"loss": 2.5175, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.655840344355313e-06, |
|
"loss": 2.5207, |
|
"step": 23450 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.631383290941108e-06, |
|
"loss": 2.4926, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_accuracy": 0.5141896766814423, |
|
"eval_loss": 2.3520689010620117, |
|
"eval_runtime": 32.717, |
|
"eval_samples_per_second": 74.365, |
|
"eval_steps_per_second": 4.676, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.606926237526904e-06, |
|
"loss": 2.5076, |
|
"step": 23550 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.582469184112699e-06, |
|
"loss": 2.5023, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.558012130698495e-06, |
|
"loss": 2.4904, |
|
"step": 23650 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.535022500489142e-06, |
|
"loss": 2.5013, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.510565447074937e-06, |
|
"loss": 2.5109, |
|
"step": 23750 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.4870866757973e-06, |
|
"loss": 2.5075, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.462629622383096e-06, |
|
"loss": 2.5093, |
|
"step": 23850 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.438172568968892e-06, |
|
"loss": 2.5033, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.413715515554687e-06, |
|
"loss": 2.4903, |
|
"step": 23950 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.389747603208766e-06, |
|
"loss": 2.5104, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_accuracy": 0.5141647422950735, |
|
"eval_loss": 2.3516719341278076, |
|
"eval_runtime": 32.5398, |
|
"eval_samples_per_second": 74.77, |
|
"eval_steps_per_second": 4.702, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.365290549794561e-06, |
|
"loss": 2.5064, |
|
"step": 24050 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.341322637448642e-06, |
|
"loss": 2.5011, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.317354725102719e-06, |
|
"loss": 2.5028, |
|
"step": 24150 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.292897671688516e-06, |
|
"loss": 2.4954, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.26844061827431e-06, |
|
"loss": 2.514, |
|
"step": 24250 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 8.24447270592839e-06, |
|
"loss": 2.4848, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.220015652514186e-06, |
|
"loss": 2.5135, |
|
"step": 24350 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.196047740168265e-06, |
|
"loss": 2.5149, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.17159068675406e-06, |
|
"loss": 2.5076, |
|
"step": 24450 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.147133633339856e-06, |
|
"loss": 2.5067, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_accuracy": 0.5143770867467302, |
|
"eval_loss": 2.3510823249816895, |
|
"eval_runtime": 32.9679, |
|
"eval_samples_per_second": 73.799, |
|
"eval_steps_per_second": 4.641, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.122676579925651e-06, |
|
"loss": 2.5042, |
|
"step": 24550 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.098708667579732e-06, |
|
"loss": 2.4911, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.074251614165525e-06, |
|
"loss": 2.5085, |
|
"step": 24650 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 8.049794560751321e-06, |
|
"loss": 2.5185, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 8.025337507337116e-06, |
|
"loss": 2.4945, |
|
"step": 24750 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 8.001369594991197e-06, |
|
"loss": 2.5176, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.97789082371356e-06, |
|
"loss": 2.5036, |
|
"step": 24850 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.953433770299354e-06, |
|
"loss": 2.5015, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.929954999021718e-06, |
|
"loss": 2.5086, |
|
"step": 24950 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.905497945607514e-06, |
|
"loss": 2.5055, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.5142451757994889, |
|
"eval_loss": 2.3508098125457764, |
|
"eval_runtime": 32.5406, |
|
"eval_samples_per_second": 74.768, |
|
"eval_steps_per_second": 4.702, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.881040892193309e-06, |
|
"loss": 2.5044, |
|
"step": 25050 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.856583838779105e-06, |
|
"loss": 2.5009, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.8321267853649e-06, |
|
"loss": 2.511, |
|
"step": 25150 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.808648014087264e-06, |
|
"loss": 2.5019, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.78419096067306e-06, |
|
"loss": 2.503, |
|
"step": 25250 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.759733907258855e-06, |
|
"loss": 2.5006, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.73527685384465e-06, |
|
"loss": 2.5043, |
|
"step": 25350 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.711308941498729e-06, |
|
"loss": 2.4988, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.687341029152808e-06, |
|
"loss": 2.5079, |
|
"step": 25450 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.662883975738603e-06, |
|
"loss": 2.5011, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_accuracy": 0.5145990832189167, |
|
"eval_loss": 2.3502306938171387, |
|
"eval_runtime": 32.5132, |
|
"eval_samples_per_second": 74.831, |
|
"eval_steps_per_second": 4.706, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 7.639405204460967e-06, |
|
"loss": 2.5172, |
|
"step": 25550 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.615437292115047e-06, |
|
"loss": 2.502, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.590980238700842e-06, |
|
"loss": 2.5124, |
|
"step": 25650 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.567012326354921e-06, |
|
"loss": 2.4933, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.542555272940717e-06, |
|
"loss": 2.5154, |
|
"step": 25750 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.518098219526512e-06, |
|
"loss": 2.4982, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.4936411661123074e-06, |
|
"loss": 2.4936, |
|
"step": 25850 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.470162394834671e-06, |
|
"loss": 2.5064, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 7.445705341420466e-06, |
|
"loss": 2.5003, |
|
"step": 25950 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.421248288006262e-06, |
|
"loss": 2.4931, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_accuracy": 0.5147076684498775, |
|
"eval_loss": 2.349609375, |
|
"eval_runtime": 32.9366, |
|
"eval_samples_per_second": 73.869, |
|
"eval_steps_per_second": 4.645, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.396791234592057e-06, |
|
"loss": 2.5008, |
|
"step": 26050 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.372334181177852e-06, |
|
"loss": 2.5034, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.348366268831931e-06, |
|
"loss": 2.5083, |
|
"step": 26150 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.324398356486011e-06, |
|
"loss": 2.5028, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.299941303071807e-06, |
|
"loss": 2.5082, |
|
"step": 26250 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.275973390725886e-06, |
|
"loss": 2.52, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 7.251516337311681e-06, |
|
"loss": 2.5099, |
|
"step": 26350 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.228037566034045e-06, |
|
"loss": 2.5112, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.20358051261984e-06, |
|
"loss": 2.4946, |
|
"step": 26450 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.179123459205636e-06, |
|
"loss": 2.4965, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_accuracy": 0.5147020381045684, |
|
"eval_loss": 2.34912109375, |
|
"eval_runtime": 32.715, |
|
"eval_samples_per_second": 74.37, |
|
"eval_steps_per_second": 4.677, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.154666405791431e-06, |
|
"loss": 2.5126, |
|
"step": 26550 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.130209352377226e-06, |
|
"loss": 2.5, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.10673058109959e-06, |
|
"loss": 2.4946, |
|
"step": 26650 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.082762668753669e-06, |
|
"loss": 2.5005, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 7.058305615339465e-06, |
|
"loss": 2.4972, |
|
"step": 26750 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 7.03384856192526e-06, |
|
"loss": 2.4976, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 7.009391508511055e-06, |
|
"loss": 2.5075, |
|
"step": 26850 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.985912737233419e-06, |
|
"loss": 2.508, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.961944824887498e-06, |
|
"loss": 2.5023, |
|
"step": 26950 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.937487771473294e-06, |
|
"loss": 2.495, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_accuracy": 0.5146360826309477, |
|
"eval_loss": 2.348769426345825, |
|
"eval_runtime": 32.8878, |
|
"eval_samples_per_second": 73.979, |
|
"eval_steps_per_second": 4.652, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.9135198591273735e-06, |
|
"loss": 2.5072, |
|
"step": 27050 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.889551946781452e-06, |
|
"loss": 2.509, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 6.865094893367248e-06, |
|
"loss": 2.4979, |
|
"step": 27150 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 6.841126981021327e-06, |
|
"loss": 2.5134, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 6.816669927607122e-06, |
|
"loss": 2.4994, |
|
"step": 27250 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 6.792212874192918e-06, |
|
"loss": 2.5072, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 6.768244961846997e-06, |
|
"loss": 2.5068, |
|
"step": 27350 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 6.743787908432793e-06, |
|
"loss": 2.5097, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 6.719819996086871e-06, |
|
"loss": 2.4966, |
|
"step": 27450 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 6.695362942672668e-06, |
|
"loss": 2.5051, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_accuracy": 0.5150479021735546, |
|
"eval_loss": 2.348109245300293, |
|
"eval_runtime": 32.717, |
|
"eval_samples_per_second": 74.365, |
|
"eval_steps_per_second": 4.676, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 6.670905889258462e-06, |
|
"loss": 2.5036, |
|
"step": 27550 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.646937976912542e-06, |
|
"loss": 2.4975, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.6224809234983375e-06, |
|
"loss": 2.485, |
|
"step": 27650 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.598513011152416e-06, |
|
"loss": 2.4965, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.574055957738213e-06, |
|
"loss": 2.4911, |
|
"step": 27750 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.549598904324007e-06, |
|
"loss": 2.5005, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.525141850909803e-06, |
|
"loss": 2.496, |
|
"step": 27850 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.501173938563883e-06, |
|
"loss": 2.4988, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.477206026217961e-06, |
|
"loss": 2.4977, |
|
"step": 27950 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 6.4532381138720416e-06, |
|
"loss": 2.51, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.5149642513289626, |
|
"eval_loss": 2.3477537631988525, |
|
"eval_runtime": 32.5166, |
|
"eval_samples_per_second": 74.823, |
|
"eval_steps_per_second": 4.705, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.428781060457837e-06, |
|
"loss": 2.5063, |
|
"step": 28050 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.404324007043632e-06, |
|
"loss": 2.4999, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.379866953629428e-06, |
|
"loss": 2.4941, |
|
"step": 28150 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.355899041283507e-06, |
|
"loss": 2.5031, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.331441987869302e-06, |
|
"loss": 2.5015, |
|
"step": 28250 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.307474075523381e-06, |
|
"loss": 2.5091, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.283506163177461e-06, |
|
"loss": 2.4905, |
|
"step": 28350 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.259049109763257e-06, |
|
"loss": 2.5008, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 6.234592056349052e-06, |
|
"loss": 2.4837, |
|
"step": 28450 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 6.210624144003131e-06, |
|
"loss": 2.4883, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_accuracy": 0.515170161100266, |
|
"eval_loss": 2.347412586212158, |
|
"eval_runtime": 32.8011, |
|
"eval_samples_per_second": 74.174, |
|
"eval_steps_per_second": 4.664, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 6.186167090588926e-06, |
|
"loss": 2.5017, |
|
"step": 28550 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 6.1621991782430055e-06, |
|
"loss": 2.4983, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 6.137742124828802e-06, |
|
"loss": 2.5059, |
|
"step": 28650 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 6.1132850714145965e-06, |
|
"loss": 2.5119, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 6.089317159068676e-06, |
|
"loss": 2.5034, |
|
"step": 28750 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 6.065349246722755e-06, |
|
"loss": 2.5012, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 6.0413813343768345e-06, |
|
"loss": 2.5052, |
|
"step": 28850 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 6.016924280962631e-06, |
|
"loss": 2.5008, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5.992956368616709e-06, |
|
"loss": 2.4927, |
|
"step": 28950 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5.968499315202505e-06, |
|
"loss": 2.4973, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_accuracy": 0.515144422378853, |
|
"eval_loss": 2.3470253944396973, |
|
"eval_runtime": 32.569, |
|
"eval_samples_per_second": 74.703, |
|
"eval_steps_per_second": 4.698, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5.9440422617883e-06, |
|
"loss": 2.4978, |
|
"step": 29050 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5.919585208374096e-06, |
|
"loss": 2.4971, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5.895128154959891e-06, |
|
"loss": 2.497, |
|
"step": 29150 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 5.87116024261397e-06, |
|
"loss": 2.4949, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.847681471336334e-06, |
|
"loss": 2.4992, |
|
"step": 29250 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.823224417922129e-06, |
|
"loss": 2.5073, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.798767364507924e-06, |
|
"loss": 2.5131, |
|
"step": 29350 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.7743103110937205e-06, |
|
"loss": 2.4901, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.749853257679515e-06, |
|
"loss": 2.4982, |
|
"step": 29450 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.725396204265311e-06, |
|
"loss": 2.4939, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_accuracy": 0.5153318324441409, |
|
"eval_loss": 2.346403121948242, |
|
"eval_runtime": 32.5012, |
|
"eval_samples_per_second": 74.859, |
|
"eval_steps_per_second": 4.708, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.7019174329876736e-06, |
|
"loss": 2.4936, |
|
"step": 29550 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 5.67746037957347e-06, |
|
"loss": 2.4974, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.6530033261592646e-06, |
|
"loss": 2.4885, |
|
"step": 29650 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.629524554881628e-06, |
|
"loss": 2.5044, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.605556642535708e-06, |
|
"loss": 2.4987, |
|
"step": 29750 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.5815887301897875e-06, |
|
"loss": 2.5019, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.557131676775582e-06, |
|
"loss": 2.5086, |
|
"step": 29850 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.5326746233613785e-06, |
|
"loss": 2.5045, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.508217569947173e-06, |
|
"loss": 2.4995, |
|
"step": 29950 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.483760516532969e-06, |
|
"loss": 2.4952, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_accuracy": 0.5152570292850346, |
|
"eval_loss": 2.3460702896118164, |
|
"eval_runtime": 32.6916, |
|
"eval_samples_per_second": 74.423, |
|
"eval_steps_per_second": 4.68, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 5.459303463118763e-06, |
|
"loss": 2.5039, |
|
"step": 30050 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5.435335550772844e-06, |
|
"loss": 2.5014, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5.411367638426923e-06, |
|
"loss": 2.4918, |
|
"step": 30150 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5.386910585012718e-06, |
|
"loss": 2.4954, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5.3624535315985135e-06, |
|
"loss": 2.4942, |
|
"step": 30250 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5.338485619252592e-06, |
|
"loss": 2.4852, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5.315006847974956e-06, |
|
"loss": 2.5039, |
|
"step": 30350 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5.290549794560752e-06, |
|
"loss": 2.4945, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 5.266581882214831e-06, |
|
"loss": 2.5077, |
|
"step": 30450 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5.242613969868911e-06, |
|
"loss": 2.5028, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_accuracy": 0.5153961792476732, |
|
"eval_loss": 2.345855712890625, |
|
"eval_runtime": 32.4963, |
|
"eval_samples_per_second": 74.87, |
|
"eval_steps_per_second": 4.708, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5.218156916454706e-06, |
|
"loss": 2.5017, |
|
"step": 30550 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5.193699863040501e-06, |
|
"loss": 2.4878, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5.169242809626297e-06, |
|
"loss": 2.4964, |
|
"step": 30650 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5.144785756212092e-06, |
|
"loss": 2.494, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5.1208178438661714e-06, |
|
"loss": 2.4985, |
|
"step": 30750 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5.096849931520251e-06, |
|
"loss": 2.491, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 5.07288201917433e-06, |
|
"loss": 2.4893, |
|
"step": 30850 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5.048424965760126e-06, |
|
"loss": 2.4917, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5.024457053414205e-06, |
|
"loss": 2.4879, |
|
"step": 30950 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 5.000978282136568e-06, |
|
"loss": 2.4979, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_accuracy": 0.5153632015108629, |
|
"eval_loss": 2.3453683853149414, |
|
"eval_runtime": 32.8384, |
|
"eval_samples_per_second": 74.09, |
|
"eval_steps_per_second": 4.659, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.976521228722364e-06, |
|
"loss": 2.5128, |
|
"step": 31050 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.95206417530816e-06, |
|
"loss": 2.5052, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.927607121893955e-06, |
|
"loss": 2.5018, |
|
"step": 31150 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.90315006847975e-06, |
|
"loss": 2.5049, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 4.878693015065545e-06, |
|
"loss": 2.497, |
|
"step": 31250 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.854725102719625e-06, |
|
"loss": 2.4941, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.8302680493054195e-06, |
|
"loss": 2.5037, |
|
"step": 31350 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.8063001369595e-06, |
|
"loss": 2.4963, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.782332224613579e-06, |
|
"loss": 2.4972, |
|
"step": 31450 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.757875171199374e-06, |
|
"loss": 2.4928, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_accuracy": 0.5154951124581042, |
|
"eval_loss": 2.345027208328247, |
|
"eval_runtime": 32.6929, |
|
"eval_samples_per_second": 74.42, |
|
"eval_steps_per_second": 4.68, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.73341811778517e-06, |
|
"loss": 2.5037, |
|
"step": 31550 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.708961064370965e-06, |
|
"loss": 2.505, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 4.684993152025045e-06, |
|
"loss": 2.5032, |
|
"step": 31650 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.661514380747408e-06, |
|
"loss": 2.5132, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.637057327333203e-06, |
|
"loss": 2.5075, |
|
"step": 31750 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.612600273918999e-06, |
|
"loss": 2.4932, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.588143220504794e-06, |
|
"loss": 2.4801, |
|
"step": 31850 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.564664449227157e-06, |
|
"loss": 2.5061, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.5402073958129526e-06, |
|
"loss": 2.5008, |
|
"step": 31950 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.515750342398748e-06, |
|
"loss": 2.501, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.5156221973950805, |
|
"eval_loss": 2.344646692276001, |
|
"eval_runtime": 32.688, |
|
"eval_samples_per_second": 74.431, |
|
"eval_steps_per_second": 4.681, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.492271571121112e-06, |
|
"loss": 2.4899, |
|
"step": 32050 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.468303658775191e-06, |
|
"loss": 2.4936, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.443846605360986e-06, |
|
"loss": 2.4883, |
|
"step": 32150 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.4193895519467815e-06, |
|
"loss": 2.493, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.394932498532577e-06, |
|
"loss": 2.4964, |
|
"step": 32250 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.3704754451183725e-06, |
|
"loss": 2.5093, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.346018391704168e-06, |
|
"loss": 2.5009, |
|
"step": 32350 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.322050479358248e-06, |
|
"loss": 2.4905, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.298082567012326e-06, |
|
"loss": 2.4964, |
|
"step": 32450 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.274114654666406e-06, |
|
"loss": 2.5, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_accuracy": 0.5156302407455221, |
|
"eval_loss": 2.344324827194214, |
|
"eval_runtime": 32.7043, |
|
"eval_samples_per_second": 74.394, |
|
"eval_steps_per_second": 4.678, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.2496576012522015e-06, |
|
"loss": 2.504, |
|
"step": 32550 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.225200547837997e-06, |
|
"loss": 2.4964, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.2007434944237925e-06, |
|
"loss": 2.5011, |
|
"step": 32650 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.177264723146155e-06, |
|
"loss": 2.4929, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.152807669731951e-06, |
|
"loss": 2.4895, |
|
"step": 32750 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.1288397573860305e-06, |
|
"loss": 2.4839, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.10487184504011e-06, |
|
"loss": 2.4969, |
|
"step": 32850 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.080414791625905e-06, |
|
"loss": 2.4876, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.0559577382117e-06, |
|
"loss": 2.4903, |
|
"step": 32950 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.03198982586578e-06, |
|
"loss": 2.4865, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_accuracy": 0.5156238060651689, |
|
"eval_loss": 2.343801259994507, |
|
"eval_runtime": 32.5872, |
|
"eval_samples_per_second": 74.661, |
|
"eval_steps_per_second": 4.695, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.007532772451575e-06, |
|
"loss": 2.5003, |
|
"step": 33050 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.983564860105655e-06, |
|
"loss": 2.4968, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.9591078066914504e-06, |
|
"loss": 2.4738, |
|
"step": 33150 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.935139894345529e-06, |
|
"loss": 2.4972, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.911171981999609e-06, |
|
"loss": 2.4924, |
|
"step": 33250 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.886714928585404e-06, |
|
"loss": 2.4915, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.8622578751712e-06, |
|
"loss": 2.4946, |
|
"step": 33350 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.837800821756995e-06, |
|
"loss": 2.4924, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.8133437683427904e-06, |
|
"loss": 2.4889, |
|
"step": 33450 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.788886714928586e-06, |
|
"loss": 2.4898, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_accuracy": 0.5156752835079947, |
|
"eval_loss": 2.3433618545532227, |
|
"eval_runtime": 32.4889, |
|
"eval_samples_per_second": 74.887, |
|
"eval_steps_per_second": 4.709, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.764918802582665e-06, |
|
"loss": 2.4994, |
|
"step": 33550 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.7414400313050288e-06, |
|
"loss": 2.5027, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.716982977890824e-06, |
|
"loss": 2.4859, |
|
"step": 33650 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.6935042066131876e-06, |
|
"loss": 2.4952, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.669047153198983e-06, |
|
"loss": 2.4965, |
|
"step": 33750 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.6450792408530623e-06, |
|
"loss": 2.507, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.6206221874388573e-06, |
|
"loss": 2.4879, |
|
"step": 33850 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.596165134024653e-06, |
|
"loss": 2.4995, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.5717080806104483e-06, |
|
"loss": 2.4978, |
|
"step": 33950 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.5472510271962434e-06, |
|
"loss": 2.4977, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_accuracy": 0.515959213778581, |
|
"eval_loss": 2.3429620265960693, |
|
"eval_runtime": 32.8572, |
|
"eval_samples_per_second": 74.048, |
|
"eval_steps_per_second": 4.657, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.522793973782039e-06, |
|
"loss": 2.4929, |
|
"step": 34050 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 3.4988260614361185e-06, |
|
"loss": 2.4945, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.4748581490901977e-06, |
|
"loss": 2.4988, |
|
"step": 34150 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.450401095675993e-06, |
|
"loss": 2.4937, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.4259440422617886e-06, |
|
"loss": 2.5015, |
|
"step": 34250 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.4019761299158683e-06, |
|
"loss": 2.499, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.3780082175699474e-06, |
|
"loss": 2.4989, |
|
"step": 34350 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.353551164155743e-06, |
|
"loss": 2.5043, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.329583251809822e-06, |
|
"loss": 2.4918, |
|
"step": 34450 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.305126198395617e-06, |
|
"loss": 2.4904, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_accuracy": 0.5156961962191426, |
|
"eval_loss": 2.3426706790924072, |
|
"eval_runtime": 32.6066, |
|
"eval_samples_per_second": 74.617, |
|
"eval_steps_per_second": 4.692, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.2806691449814127e-06, |
|
"loss": 2.4934, |
|
"step": 34550 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.2562120915672086e-06, |
|
"loss": 2.4905, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.2317550381530037e-06, |
|
"loss": 2.4866, |
|
"step": 34650 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.207297984738799e-06, |
|
"loss": 2.4916, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.1838192134611625e-06, |
|
"loss": 2.4913, |
|
"step": 34750 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.1603404421835258e-06, |
|
"loss": 2.4898, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.1358833887693213e-06, |
|
"loss": 2.4992, |
|
"step": 34850 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.1114263353551168e-06, |
|
"loss": 2.4833, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.086969281940912e-06, |
|
"loss": 2.5011, |
|
"step": 34950 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.0625122285267073e-06, |
|
"loss": 2.4779, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_accuracy": 0.5159109536759318, |
|
"eval_loss": 2.342430353164673, |
|
"eval_runtime": 32.637, |
|
"eval_samples_per_second": 74.547, |
|
"eval_steps_per_second": 4.688, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.038544316180787e-06, |
|
"loss": 2.4945, |
|
"step": 35050 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.0140872627665824e-06, |
|
"loss": 2.4966, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.9901193504206616e-06, |
|
"loss": 2.4912, |
|
"step": 35150 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.965662297006457e-06, |
|
"loss": 2.5, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.9416943846605363e-06, |
|
"loss": 2.4955, |
|
"step": 35250 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.9172373312463314e-06, |
|
"loss": 2.4884, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.892780277832127e-06, |
|
"loss": 2.4983, |
|
"step": 35350 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.8693015065544906e-06, |
|
"loss": 2.4981, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.84533359420857e-06, |
|
"loss": 2.496, |
|
"step": 35450 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.8208765407943657e-06, |
|
"loss": 2.4792, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_accuracy": 0.5158892366297396, |
|
"eval_loss": 2.3420228958129883, |
|
"eval_runtime": 32.5968, |
|
"eval_samples_per_second": 74.639, |
|
"eval_steps_per_second": 4.694, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.7964194873801608e-06, |
|
"loss": 2.4965, |
|
"step": 35550 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.77245157503424e-06, |
|
"loss": 2.5008, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.748483662688319e-06, |
|
"loss": 2.4933, |
|
"step": 35650 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.724515750342399e-06, |
|
"loss": 2.4984, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.7000586969281942e-06, |
|
"loss": 2.481, |
|
"step": 35750 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.6756016435139897e-06, |
|
"loss": 2.498, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.6511445900997852e-06, |
|
"loss": 2.4974, |
|
"step": 35850 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.6266875366855803e-06, |
|
"loss": 2.4965, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.6022304832713758e-06, |
|
"loss": 2.4812, |
|
"step": 35950 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.577773429857171e-06, |
|
"loss": 2.4931, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_accuracy": 0.5160203432419368, |
|
"eval_loss": 2.3419036865234375, |
|
"eval_runtime": 32.1614, |
|
"eval_samples_per_second": 75.65, |
|
"eval_steps_per_second": 4.757, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.5533163764429663e-06, |
|
"loss": 2.4803, |
|
"step": 36050 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.52983760516533e-06, |
|
"loss": 2.5021, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.5053805517511256e-06, |
|
"loss": 2.4942, |
|
"step": 36150 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.4814126394052048e-06, |
|
"loss": 2.4948, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.456955585991e-06, |
|
"loss": 2.488, |
|
"step": 36250 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.4329876736450794e-06, |
|
"loss": 2.4923, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.408530620230875e-06, |
|
"loss": 2.4896, |
|
"step": 36350 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.384562707884954e-06, |
|
"loss": 2.5058, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.3605947955390337e-06, |
|
"loss": 2.4995, |
|
"step": 36450 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.336626883193113e-06, |
|
"loss": 2.4997, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_accuracy": 0.5160388429479523, |
|
"eval_loss": 2.3416337966918945, |
|
"eval_runtime": 32.6706, |
|
"eval_samples_per_second": 74.471, |
|
"eval_steps_per_second": 4.683, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 2.3121698297789084e-06, |
|
"loss": 2.4922, |
|
"step": 36550 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.2891801995695563e-06, |
|
"loss": 2.4925, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.2652122872236355e-06, |
|
"loss": 2.4839, |
|
"step": 36650 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.2407552338094305e-06, |
|
"loss": 2.4843, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.216298180395226e-06, |
|
"loss": 2.4843, |
|
"step": 36750 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.1923302680493056e-06, |
|
"loss": 2.5024, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.1678732146351007e-06, |
|
"loss": 2.4881, |
|
"step": 36850 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.143416161220896e-06, |
|
"loss": 2.504, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.1189591078066917e-06, |
|
"loss": 2.4966, |
|
"step": 36950 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 2.0945020543924868e-06, |
|
"loss": 2.4986, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"eval_accuracy": 0.5161289284728975, |
|
"eval_loss": 2.341364860534668, |
|
"eval_runtime": 32.1728, |
|
"eval_samples_per_second": 75.623, |
|
"eval_steps_per_second": 4.756, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.0705341420465664e-06, |
|
"loss": 2.4914, |
|
"step": 37050 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.046077088632362e-06, |
|
"loss": 2.4945, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 2.021620035218157e-06, |
|
"loss": 2.4977, |
|
"step": 37150 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.9981412639405207e-06, |
|
"loss": 2.4917, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.973684210526316e-06, |
|
"loss": 2.4918, |
|
"step": 37250 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.9497162981803953e-06, |
|
"loss": 2.4991, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.9257483858344745e-06, |
|
"loss": 2.5053, |
|
"step": 37350 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.9017804734885541e-06, |
|
"loss": 2.4826, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.8773234200743496e-06, |
|
"loss": 2.4844, |
|
"step": 37450 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.852866366660145e-06, |
|
"loss": 2.4965, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_accuracy": 0.5162141879875779, |
|
"eval_loss": 2.341057538986206, |
|
"eval_runtime": 32.6333, |
|
"eval_samples_per_second": 74.556, |
|
"eval_steps_per_second": 4.688, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.8284093132459402e-06, |
|
"loss": 2.4963, |
|
"step": 37550 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.8039522598317355e-06, |
|
"loss": 2.4976, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.7804734885540992e-06, |
|
"loss": 2.4893, |
|
"step": 37650 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.7560164351398945e-06, |
|
"loss": 2.4976, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.732048522793974e-06, |
|
"loss": 2.4924, |
|
"step": 37750 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.7080806104480533e-06, |
|
"loss": 2.4884, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.6836235570338488e-06, |
|
"loss": 2.4838, |
|
"step": 37850 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.659166503619644e-06, |
|
"loss": 2.4991, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.6356877323420076e-06, |
|
"loss": 2.4943, |
|
"step": 37950 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.6112306789278029e-06, |
|
"loss": 2.4743, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_accuracy": 0.5161860362610324, |
|
"eval_loss": 2.3409061431884766, |
|
"eval_runtime": 33.3068, |
|
"eval_samples_per_second": 73.048, |
|
"eval_steps_per_second": 4.594, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.5867736255135981e-06, |
|
"loss": 2.4938, |
|
"step": 38050 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.5632948542359619e-06, |
|
"loss": 2.4884, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.5388378008217572e-06, |
|
"loss": 2.4933, |
|
"step": 38150 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 1.5143807474075524e-06, |
|
"loss": 2.5, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.490901976129916e-06, |
|
"loss": 2.4906, |
|
"step": 38250 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.4664449227157112e-06, |
|
"loss": 2.4821, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.4424770103697909e-06, |
|
"loss": 2.4861, |
|
"step": 38350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.4180199569555861e-06, |
|
"loss": 2.4956, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.3935629035413814e-06, |
|
"loss": 2.5016, |
|
"step": 38450 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.3691058501271767e-06, |
|
"loss": 2.497, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_accuracy": 0.5163235775535828, |
|
"eval_loss": 2.3405728340148926, |
|
"eval_runtime": 32.196, |
|
"eval_samples_per_second": 75.568, |
|
"eval_steps_per_second": 4.752, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.3456270788495404e-06, |
|
"loss": 2.4884, |
|
"step": 38550 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.3211700254353357e-06, |
|
"loss": 2.4936, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.297202113089415e-06, |
|
"loss": 2.4894, |
|
"step": 38650 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.2737233418117786e-06, |
|
"loss": 2.4807, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.249266288397574e-06, |
|
"loss": 2.4942, |
|
"step": 38750 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.2248092349833692e-06, |
|
"loss": 2.4939, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.201330463705733e-06, |
|
"loss": 2.4968, |
|
"step": 38850 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.1768734102915282e-06, |
|
"loss": 2.4934, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.1524163568773235e-06, |
|
"loss": 2.5042, |
|
"step": 38950 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.1284484445314029e-06, |
|
"loss": 2.4942, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_accuracy": 0.5161972969516506, |
|
"eval_loss": 2.3403635025024414, |
|
"eval_runtime": 33.4158, |
|
"eval_samples_per_second": 72.81, |
|
"eval_steps_per_second": 4.579, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.1039913911171984e-06, |
|
"loss": 2.4909, |
|
"step": 39050 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.0800234787712776e-06, |
|
"loss": 2.4877, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.0565447074936413e-06, |
|
"loss": 2.4823, |
|
"step": 39150 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.0320876540794366e-06, |
|
"loss": 2.4907, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.007630600665232e-06, |
|
"loss": 2.4978, |
|
"step": 39250 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 9.831735472510274e-07, |
|
"loss": 2.4916, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 9.587164938368226e-07, |
|
"loss": 2.5033, |
|
"step": 39350 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 9.352377225591862e-07, |
|
"loss": 2.4962, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 9.107806691449815e-07, |
|
"loss": 2.4796, |
|
"step": 39450 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 8.868127567990608e-07, |
|
"loss": 2.4907, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_accuracy": 0.5162889911466841, |
|
"eval_loss": 2.3401567935943604, |
|
"eval_runtime": 33.2059, |
|
"eval_samples_per_second": 73.27, |
|
"eval_steps_per_second": 4.608, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 8.628448444531404e-07, |
|
"loss": 2.4889, |
|
"step": 39550 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 8.388769321072198e-07, |
|
"loss": 2.4849, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 8.144198786930152e-07, |
|
"loss": 2.486, |
|
"step": 39650 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 7.909411074153786e-07, |
|
"loss": 2.4921, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 7.66484054001174e-07, |
|
"loss": 2.4956, |
|
"step": 39750 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 7.420270005869694e-07, |
|
"loss": 2.4897, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 7.175699471727647e-07, |
|
"loss": 2.5071, |
|
"step": 39850 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.9311289375856e-07, |
|
"loss": 2.4944, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.686558403443554e-07, |
|
"loss": 2.4962, |
|
"step": 39950 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.446879279984349e-07, |
|
"loss": 2.4821, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_accuracy": 0.5163211645484503, |
|
"eval_loss": 2.3399698734283447, |
|
"eval_runtime": 32.4683, |
|
"eval_samples_per_second": 74.935, |
|
"eval_steps_per_second": 4.712, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 6.202308745842302e-07, |
|
"loss": 2.4895, |
|
"step": 40050 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.957738211700255e-07, |
|
"loss": 2.4924, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.713167677558208e-07, |
|
"loss": 2.5049, |
|
"step": 40150 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.473488554099002e-07, |
|
"loss": 2.4949, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.228918019956956e-07, |
|
"loss": 2.4972, |
|
"step": 40250 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.98923889649775e-07, |
|
"loss": 2.4936, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.7495597730385447e-07, |
|
"loss": 2.4867, |
|
"step": 40350 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.5098806495793387e-07, |
|
"loss": 2.4935, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.270201526120133e-07, |
|
"loss": 2.4938, |
|
"step": 40450 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.025630991978087e-07, |
|
"loss": 2.4857, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.5163364469142893, |
|
"eval_loss": 2.339829683303833, |
|
"eval_runtime": 33.521, |
|
"eval_samples_per_second": 72.581, |
|
"eval_steps_per_second": 4.564, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.7810604578360404e-07, |
|
"loss": 2.496, |
|
"step": 40550 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.536489923693993e-07, |
|
"loss": 2.4848, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.301702210917629e-07, |
|
"loss": 2.487, |
|
"step": 40650 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.062023087458423e-07, |
|
"loss": 2.4902, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.817452553316377e-07, |
|
"loss": 2.478, |
|
"step": 40750 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.57288201917433e-07, |
|
"loss": 2.4857, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 2.3283114850322835e-07, |
|
"loss": 2.472, |
|
"step": 40850 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 40888, |
|
"total_flos": 1.7043212067476708e+20, |
|
"train_loss": 2.5243868586182523, |
|
"train_runtime": 155862.8364, |
|
"train_samples_per_second": 33.579, |
|
"train_steps_per_second": 0.262 |
|
} |
|
], |
|
"logging_steps": 50, |
|
"max_steps": 40888, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 4000, |
|
"total_flos": 1.7043212067476708e+20, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|