|
{ |
|
"best_metric": 0.8676470588235294, |
|
"best_model_checkpoint": "mobilenet_v2_1.0_224-finetuned-papsmear/checkpoint-1797", |
|
"epoch": 59.6078431372549, |
|
"eval_steps": 500, |
|
"global_step": 2280, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.26143790849673204, |
|
"grad_norm": 9.104169845581055, |
|
"learning_rate": 2.1929824561403507e-06, |
|
"loss": 1.8301, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.5228758169934641, |
|
"grad_norm": 8.698704719543457, |
|
"learning_rate": 4.3859649122807014e-06, |
|
"loss": 1.8234, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.7843137254901961, |
|
"grad_norm": 9.768064498901367, |
|
"learning_rate": 6.578947368421053e-06, |
|
"loss": 1.7932, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.9934640522875817, |
|
"eval_accuracy": 0.25, |
|
"eval_loss": 1.7606836557388306, |
|
"eval_runtime": 20.4249, |
|
"eval_samples_per_second": 6.659, |
|
"eval_steps_per_second": 0.832, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 1.0457516339869282, |
|
"grad_norm": 8.835221290588379, |
|
"learning_rate": 8.771929824561403e-06, |
|
"loss": 1.7895, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.3071895424836601, |
|
"grad_norm": 9.015777587890625, |
|
"learning_rate": 1.0964912280701754e-05, |
|
"loss": 1.7622, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.5686274509803921, |
|
"grad_norm": 8.489930152893066, |
|
"learning_rate": 1.3157894736842106e-05, |
|
"loss": 1.7277, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.8300653594771243, |
|
"grad_norm": 8.624068260192871, |
|
"learning_rate": 1.5350877192982457e-05, |
|
"loss": 1.6542, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.9869281045751634, |
|
"eval_accuracy": 0.39705882352941174, |
|
"eval_loss": 1.5735763311386108, |
|
"eval_runtime": 19.6256, |
|
"eval_samples_per_second": 6.93, |
|
"eval_steps_per_second": 0.866, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 2.0915032679738563, |
|
"grad_norm": 8.306758880615234, |
|
"learning_rate": 1.7543859649122806e-05, |
|
"loss": 1.6402, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.3529411764705883, |
|
"grad_norm": 7.969709873199463, |
|
"learning_rate": 1.9736842105263158e-05, |
|
"loss": 1.6017, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.6143790849673203, |
|
"grad_norm": 8.150823593139648, |
|
"learning_rate": 2.1929824561403507e-05, |
|
"loss": 1.5358, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.8758169934640523, |
|
"grad_norm": 7.387766361236572, |
|
"learning_rate": 2.412280701754386e-05, |
|
"loss": 1.4692, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.980392156862745, |
|
"eval_accuracy": 0.36764705882352944, |
|
"eval_loss": 1.4805188179016113, |
|
"eval_runtime": 20.2426, |
|
"eval_samples_per_second": 6.719, |
|
"eval_steps_per_second": 0.84, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 3.1372549019607843, |
|
"grad_norm": 8.2266845703125, |
|
"learning_rate": 2.6315789473684212e-05, |
|
"loss": 1.439, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.3986928104575163, |
|
"grad_norm": 8.400202751159668, |
|
"learning_rate": 2.850877192982456e-05, |
|
"loss": 1.399, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 3.6601307189542482, |
|
"grad_norm": 8.897980690002441, |
|
"learning_rate": 3.0701754385964913e-05, |
|
"loss": 1.3496, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.9215686274509802, |
|
"grad_norm": 8.579508781433105, |
|
"learning_rate": 3.289473684210527e-05, |
|
"loss": 1.2759, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5808823529411765, |
|
"eval_loss": 1.2177482843399048, |
|
"eval_runtime": 19.8987, |
|
"eval_samples_per_second": 6.835, |
|
"eval_steps_per_second": 0.854, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 4.183006535947713, |
|
"grad_norm": 7.850776195526123, |
|
"learning_rate": 3.508771929824561e-05, |
|
"loss": 1.2353, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 4.444444444444445, |
|
"grad_norm": 7.207574367523193, |
|
"learning_rate": 3.728070175438597e-05, |
|
"loss": 1.1957, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 4.705882352941177, |
|
"grad_norm": 7.722698211669922, |
|
"learning_rate": 3.9473684210526316e-05, |
|
"loss": 1.1894, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 4.967320261437909, |
|
"grad_norm": 7.626311779022217, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 1.1521, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 4.993464052287582, |
|
"eval_accuracy": 0.6470588235294118, |
|
"eval_loss": 1.0726604461669922, |
|
"eval_runtime": 19.9729, |
|
"eval_samples_per_second": 6.809, |
|
"eval_steps_per_second": 0.851, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 5.228758169934641, |
|
"grad_norm": 6.824063301086426, |
|
"learning_rate": 4.3859649122807014e-05, |
|
"loss": 1.1157, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.490196078431373, |
|
"grad_norm": 9.015517234802246, |
|
"learning_rate": 4.605263157894737e-05, |
|
"loss": 1.0953, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 5.751633986928105, |
|
"grad_norm": 7.684938430786133, |
|
"learning_rate": 4.824561403508772e-05, |
|
"loss": 1.078, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 5.9869281045751634, |
|
"eval_accuracy": 0.6176470588235294, |
|
"eval_loss": 0.9995782375335693, |
|
"eval_runtime": 19.7917, |
|
"eval_samples_per_second": 6.872, |
|
"eval_steps_per_second": 0.859, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 6.0130718954248366, |
|
"grad_norm": 8.750350952148438, |
|
"learning_rate": 4.9951267056530214e-05, |
|
"loss": 1.0635, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 6.2745098039215685, |
|
"grad_norm": 6.832897186279297, |
|
"learning_rate": 4.970760233918128e-05, |
|
"loss": 0.9083, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 6.5359477124183005, |
|
"grad_norm": 6.695949554443359, |
|
"learning_rate": 4.9463937621832366e-05, |
|
"loss": 0.9306, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 6.7973856209150325, |
|
"grad_norm": 7.3760271072387695, |
|
"learning_rate": 4.9220272904483435e-05, |
|
"loss": 1.0235, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 6.980392156862745, |
|
"eval_accuracy": 0.7058823529411765, |
|
"eval_loss": 0.8680174946784973, |
|
"eval_runtime": 19.5178, |
|
"eval_samples_per_second": 6.968, |
|
"eval_steps_per_second": 0.871, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 7.0588235294117645, |
|
"grad_norm": 6.625696659088135, |
|
"learning_rate": 4.8976608187134504e-05, |
|
"loss": 0.885, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 7.3202614379084965, |
|
"grad_norm": 6.442111015319824, |
|
"learning_rate": 4.8732943469785574e-05, |
|
"loss": 0.8222, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 7.5816993464052285, |
|
"grad_norm": 8.278558731079102, |
|
"learning_rate": 4.848927875243665e-05, |
|
"loss": 0.8869, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 7.8431372549019605, |
|
"grad_norm": 7.1332106590271, |
|
"learning_rate": 4.824561403508772e-05, |
|
"loss": 0.9554, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6397058823529411, |
|
"eval_loss": 0.9272549748420715, |
|
"eval_runtime": 19.5476, |
|
"eval_samples_per_second": 6.957, |
|
"eval_steps_per_second": 0.87, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 8.104575163398692, |
|
"grad_norm": 8.287984848022461, |
|
"learning_rate": 4.8001949317738795e-05, |
|
"loss": 0.9148, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 8.366013071895425, |
|
"grad_norm": 7.765283107757568, |
|
"learning_rate": 4.7758284600389865e-05, |
|
"loss": 0.7935, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 8.627450980392156, |
|
"grad_norm": 7.960378646850586, |
|
"learning_rate": 4.751461988304094e-05, |
|
"loss": 0.9253, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 8.88888888888889, |
|
"grad_norm": 7.798432350158691, |
|
"learning_rate": 4.727095516569201e-05, |
|
"loss": 0.7437, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 8.993464052287582, |
|
"eval_accuracy": 0.7058823529411765, |
|
"eval_loss": 0.7389305233955383, |
|
"eval_runtime": 20.3392, |
|
"eval_samples_per_second": 6.687, |
|
"eval_steps_per_second": 0.836, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 9.15032679738562, |
|
"grad_norm": 7.6827921867370605, |
|
"learning_rate": 4.702729044834308e-05, |
|
"loss": 0.8238, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 9.411764705882353, |
|
"grad_norm": 7.531745433807373, |
|
"learning_rate": 4.678362573099415e-05, |
|
"loss": 0.7379, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 9.673202614379084, |
|
"grad_norm": 7.212475299835205, |
|
"learning_rate": 4.6539961013645225e-05, |
|
"loss": 0.7684, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 9.934640522875817, |
|
"grad_norm": 8.958950996398926, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 0.7876, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 9.986928104575163, |
|
"eval_accuracy": 0.7426470588235294, |
|
"eval_loss": 0.6773847937583923, |
|
"eval_runtime": 22.3142, |
|
"eval_samples_per_second": 6.095, |
|
"eval_steps_per_second": 0.762, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 10.196078431372548, |
|
"grad_norm": 8.386847496032715, |
|
"learning_rate": 4.605263157894737e-05, |
|
"loss": 0.7287, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 10.457516339869281, |
|
"grad_norm": 8.313080787658691, |
|
"learning_rate": 4.580896686159844e-05, |
|
"loss": 0.6713, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 10.718954248366012, |
|
"grad_norm": 7.874463081359863, |
|
"learning_rate": 4.5565302144249516e-05, |
|
"loss": 0.7802, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 10.980392156862745, |
|
"grad_norm": 7.682034492492676, |
|
"learning_rate": 4.5321637426900585e-05, |
|
"loss": 0.7698, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 10.980392156862745, |
|
"eval_accuracy": 0.7205882352941176, |
|
"eval_loss": 0.6569061875343323, |
|
"eval_runtime": 20.7833, |
|
"eval_samples_per_second": 6.544, |
|
"eval_steps_per_second": 0.818, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 11.241830065359476, |
|
"grad_norm": 8.04808521270752, |
|
"learning_rate": 4.507797270955166e-05, |
|
"loss": 0.7152, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 11.50326797385621, |
|
"grad_norm": 6.100086212158203, |
|
"learning_rate": 4.483430799220273e-05, |
|
"loss": 0.6517, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 11.764705882352942, |
|
"grad_norm": 10.316337585449219, |
|
"learning_rate": 4.4590643274853806e-05, |
|
"loss": 0.7597, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7573529411764706, |
|
"eval_loss": 0.6757553219795227, |
|
"eval_runtime": 21.1029, |
|
"eval_samples_per_second": 6.445, |
|
"eval_steps_per_second": 0.806, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 12.026143790849673, |
|
"grad_norm": 8.15910816192627, |
|
"learning_rate": 4.4346978557504876e-05, |
|
"loss": 0.7, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 12.287581699346406, |
|
"grad_norm": 6.629967212677002, |
|
"learning_rate": 4.4103313840155945e-05, |
|
"loss": 0.6214, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 12.549019607843137, |
|
"grad_norm": 7.6857123374938965, |
|
"learning_rate": 4.3859649122807014e-05, |
|
"loss": 0.6272, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 12.81045751633987, |
|
"grad_norm": 5.317193031311035, |
|
"learning_rate": 4.361598440545809e-05, |
|
"loss": 0.6114, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 12.993464052287582, |
|
"eval_accuracy": 0.7132352941176471, |
|
"eval_loss": 0.8278929591178894, |
|
"eval_runtime": 24.3179, |
|
"eval_samples_per_second": 5.593, |
|
"eval_steps_per_second": 0.699, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 13.071895424836601, |
|
"grad_norm": 7.161328315734863, |
|
"learning_rate": 4.3372319688109166e-05, |
|
"loss": 0.66, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 13.333333333333334, |
|
"grad_norm": 7.760936260223389, |
|
"learning_rate": 4.3128654970760236e-05, |
|
"loss": 0.6246, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 13.594771241830065, |
|
"grad_norm": 7.746845245361328, |
|
"learning_rate": 4.2884990253411305e-05, |
|
"loss": 0.6391, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 13.856209150326798, |
|
"grad_norm": 7.603119373321533, |
|
"learning_rate": 4.264132553606238e-05, |
|
"loss": 0.6847, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 13.986928104575163, |
|
"eval_accuracy": 0.7132352941176471, |
|
"eval_loss": 0.7504714131355286, |
|
"eval_runtime": 24.2998, |
|
"eval_samples_per_second": 5.597, |
|
"eval_steps_per_second": 0.7, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 14.117647058823529, |
|
"grad_norm": 6.282960414886475, |
|
"learning_rate": 4.239766081871345e-05, |
|
"loss": 0.6034, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 14.379084967320262, |
|
"grad_norm": 9.423322677612305, |
|
"learning_rate": 4.215399610136453e-05, |
|
"loss": 0.6312, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 14.640522875816993, |
|
"grad_norm": 7.931453704833984, |
|
"learning_rate": 4.1910331384015596e-05, |
|
"loss": 0.6247, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 14.901960784313726, |
|
"grad_norm": 8.18138313293457, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 0.5902, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 14.980392156862745, |
|
"eval_accuracy": 0.6691176470588235, |
|
"eval_loss": 0.7918919920921326, |
|
"eval_runtime": 22.015, |
|
"eval_samples_per_second": 6.178, |
|
"eval_steps_per_second": 0.772, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 15.163398692810457, |
|
"grad_norm": 9.473512649536133, |
|
"learning_rate": 4.142300194931774e-05, |
|
"loss": 0.6104, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 15.42483660130719, |
|
"grad_norm": 8.635931015014648, |
|
"learning_rate": 4.117933723196881e-05, |
|
"loss": 0.6392, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 15.686274509803921, |
|
"grad_norm": 8.554533004760742, |
|
"learning_rate": 4.093567251461988e-05, |
|
"loss": 0.5461, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 15.947712418300654, |
|
"grad_norm": 6.450700283050537, |
|
"learning_rate": 4.0692007797270956e-05, |
|
"loss": 0.629, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7867647058823529, |
|
"eval_loss": 0.611703097820282, |
|
"eval_runtime": 21.7065, |
|
"eval_samples_per_second": 6.265, |
|
"eval_steps_per_second": 0.783, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 16.209150326797385, |
|
"grad_norm": 7.3682026863098145, |
|
"learning_rate": 4.044834307992203e-05, |
|
"loss": 0.5648, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 16.470588235294116, |
|
"grad_norm": 7.454318523406982, |
|
"learning_rate": 4.02046783625731e-05, |
|
"loss": 0.5916, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 16.73202614379085, |
|
"grad_norm": 7.530863285064697, |
|
"learning_rate": 3.996101364522417e-05, |
|
"loss": 0.5476, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 16.99346405228758, |
|
"grad_norm": 6.975796699523926, |
|
"learning_rate": 3.971734892787525e-05, |
|
"loss": 0.5071, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 16.99346405228758, |
|
"eval_accuracy": 0.7352941176470589, |
|
"eval_loss": 0.6048275828361511, |
|
"eval_runtime": 22.49, |
|
"eval_samples_per_second": 6.047, |
|
"eval_steps_per_second": 0.756, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 17.254901960784313, |
|
"grad_norm": 7.351856708526611, |
|
"learning_rate": 3.9473684210526316e-05, |
|
"loss": 0.5372, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 17.516339869281047, |
|
"grad_norm": 8.070556640625, |
|
"learning_rate": 3.923001949317739e-05, |
|
"loss": 0.5503, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 17.77777777777778, |
|
"grad_norm": 8.659597396850586, |
|
"learning_rate": 3.898635477582846e-05, |
|
"loss": 0.5453, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 17.986928104575163, |
|
"eval_accuracy": 0.7279411764705882, |
|
"eval_loss": 0.8086075782775879, |
|
"eval_runtime": 20.3748, |
|
"eval_samples_per_second": 6.675, |
|
"eval_steps_per_second": 0.834, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 18.03921568627451, |
|
"grad_norm": 8.510287284851074, |
|
"learning_rate": 3.874269005847954e-05, |
|
"loss": 0.5091, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 18.30065359477124, |
|
"grad_norm": 9.453730583190918, |
|
"learning_rate": 3.849902534113061e-05, |
|
"loss": 0.5115, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 18.562091503267975, |
|
"grad_norm": 6.948453903198242, |
|
"learning_rate": 3.8255360623781676e-05, |
|
"loss": 0.5383, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 18.823529411764707, |
|
"grad_norm": 7.508301734924316, |
|
"learning_rate": 3.8011695906432746e-05, |
|
"loss": 0.5071, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 18.980392156862745, |
|
"eval_accuracy": 0.7058823529411765, |
|
"eval_loss": 0.783478856086731, |
|
"eval_runtime": 21.0622, |
|
"eval_samples_per_second": 6.457, |
|
"eval_steps_per_second": 0.807, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 19.084967320261438, |
|
"grad_norm": 8.099154472351074, |
|
"learning_rate": 3.776803118908382e-05, |
|
"loss": 0.4653, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 19.34640522875817, |
|
"grad_norm": 6.376596927642822, |
|
"learning_rate": 3.75243664717349e-05, |
|
"loss": 0.426, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 19.607843137254903, |
|
"grad_norm": 9.521799087524414, |
|
"learning_rate": 3.728070175438597e-05, |
|
"loss": 0.4674, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 19.869281045751634, |
|
"grad_norm": 7.47611141204834, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.5328, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.6138941645622253, |
|
"eval_runtime": 20.7772, |
|
"eval_samples_per_second": 6.546, |
|
"eval_steps_per_second": 0.818, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 20.130718954248366, |
|
"grad_norm": 8.478204727172852, |
|
"learning_rate": 3.679337231968811e-05, |
|
"loss": 0.5362, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 20.392156862745097, |
|
"grad_norm": 3.9403858184814453, |
|
"learning_rate": 3.654970760233918e-05, |
|
"loss": 0.4867, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 20.65359477124183, |
|
"grad_norm": 8.257362365722656, |
|
"learning_rate": 3.630604288499025e-05, |
|
"loss": 0.4665, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 20.915032679738562, |
|
"grad_norm": 7.73619270324707, |
|
"learning_rate": 3.606237816764133e-05, |
|
"loss": 0.5053, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 20.99346405228758, |
|
"eval_accuracy": 0.7867647058823529, |
|
"eval_loss": 0.5980984568595886, |
|
"eval_runtime": 20.8979, |
|
"eval_samples_per_second": 6.508, |
|
"eval_steps_per_second": 0.813, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 21.176470588235293, |
|
"grad_norm": 9.795605659484863, |
|
"learning_rate": 3.5818713450292403e-05, |
|
"loss": 0.4638, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 21.437908496732025, |
|
"grad_norm": 7.380087375640869, |
|
"learning_rate": 3.557504873294347e-05, |
|
"loss": 0.453, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 21.69934640522876, |
|
"grad_norm": 9.243858337402344, |
|
"learning_rate": 3.533138401559454e-05, |
|
"loss": 0.4768, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 21.96078431372549, |
|
"grad_norm": 9.421792030334473, |
|
"learning_rate": 3.508771929824561e-05, |
|
"loss": 0.4436, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 21.986928104575163, |
|
"eval_accuracy": 0.8014705882352942, |
|
"eval_loss": 0.5218998193740845, |
|
"eval_runtime": 20.8439, |
|
"eval_samples_per_second": 6.525, |
|
"eval_steps_per_second": 0.816, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 22.22222222222222, |
|
"grad_norm": 9.261748313903809, |
|
"learning_rate": 3.484405458089669e-05, |
|
"loss": 0.4918, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 22.483660130718953, |
|
"grad_norm": 7.44433069229126, |
|
"learning_rate": 3.4600389863547764e-05, |
|
"loss": 0.4141, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 22.745098039215687, |
|
"grad_norm": 7.531349182128906, |
|
"learning_rate": 3.435672514619883e-05, |
|
"loss": 0.5025, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 22.980392156862745, |
|
"eval_accuracy": 0.8088235294117647, |
|
"eval_loss": 0.4959449768066406, |
|
"eval_runtime": 21.8633, |
|
"eval_samples_per_second": 6.22, |
|
"eval_steps_per_second": 0.778, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 23.00653594771242, |
|
"grad_norm": 7.8833794593811035, |
|
"learning_rate": 3.41130604288499e-05, |
|
"loss": 0.4356, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 23.26797385620915, |
|
"grad_norm": 8.349276542663574, |
|
"learning_rate": 3.386939571150098e-05, |
|
"loss": 0.421, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 23.529411764705884, |
|
"grad_norm": 7.9358086585998535, |
|
"learning_rate": 3.362573099415205e-05, |
|
"loss": 0.4414, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 23.790849673202615, |
|
"grad_norm": 7.962532043457031, |
|
"learning_rate": 3.338206627680312e-05, |
|
"loss": 0.4984, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7794117647058824, |
|
"eval_loss": 0.5701327323913574, |
|
"eval_runtime": 20.7703, |
|
"eval_samples_per_second": 6.548, |
|
"eval_steps_per_second": 0.818, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 24.052287581699346, |
|
"grad_norm": 7.35078239440918, |
|
"learning_rate": 3.313840155945419e-05, |
|
"loss": 0.4853, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 24.313725490196077, |
|
"grad_norm": 10.068733215332031, |
|
"learning_rate": 3.289473684210527e-05, |
|
"loss": 0.4273, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 24.575163398692812, |
|
"grad_norm": 8.412373542785645, |
|
"learning_rate": 3.265107212475634e-05, |
|
"loss": 0.5412, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 24.836601307189543, |
|
"grad_norm": 6.781473636627197, |
|
"learning_rate": 3.240740740740741e-05, |
|
"loss": 0.4655, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 24.99346405228758, |
|
"eval_accuracy": 0.7205882352941176, |
|
"eval_loss": 0.7179181575775146, |
|
"eval_runtime": 21.5807, |
|
"eval_samples_per_second": 6.302, |
|
"eval_steps_per_second": 0.788, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 25.098039215686274, |
|
"grad_norm": 8.029053688049316, |
|
"learning_rate": 3.216374269005848e-05, |
|
"loss": 0.476, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 25.359477124183005, |
|
"grad_norm": 6.743289470672607, |
|
"learning_rate": 3.192007797270955e-05, |
|
"loss": 0.4561, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 25.62091503267974, |
|
"grad_norm": 8.96541690826416, |
|
"learning_rate": 3.167641325536063e-05, |
|
"loss": 0.4343, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 25.88235294117647, |
|
"grad_norm": 6.772902965545654, |
|
"learning_rate": 3.14327485380117e-05, |
|
"loss": 0.3848, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 25.986928104575163, |
|
"eval_accuracy": 0.8088235294117647, |
|
"eval_loss": 0.5074525475502014, |
|
"eval_runtime": 19.6885, |
|
"eval_samples_per_second": 6.908, |
|
"eval_steps_per_second": 0.863, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 26.143790849673202, |
|
"grad_norm": 8.97864055633545, |
|
"learning_rate": 3.118908382066277e-05, |
|
"loss": 0.4338, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 26.405228758169933, |
|
"grad_norm": 7.333850383758545, |
|
"learning_rate": 3.0945419103313844e-05, |
|
"loss": 0.4137, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 26.666666666666668, |
|
"grad_norm": 7.425909519195557, |
|
"learning_rate": 3.0701754385964913e-05, |
|
"loss": 0.4776, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 26.9281045751634, |
|
"grad_norm": 6.110713481903076, |
|
"learning_rate": 3.0458089668615986e-05, |
|
"loss": 0.3824, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 26.980392156862745, |
|
"eval_accuracy": 0.7426470588235294, |
|
"eval_loss": 0.6644823551177979, |
|
"eval_runtime": 22.0924, |
|
"eval_samples_per_second": 6.156, |
|
"eval_steps_per_second": 0.769, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 27.18954248366013, |
|
"grad_norm": 7.540946006774902, |
|
"learning_rate": 3.0214424951267055e-05, |
|
"loss": 0.3977, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 27.45098039215686, |
|
"grad_norm": 8.191174507141113, |
|
"learning_rate": 2.997076023391813e-05, |
|
"loss": 0.4704, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 27.712418300653596, |
|
"grad_norm": 10.757161140441895, |
|
"learning_rate": 2.9727095516569204e-05, |
|
"loss": 0.4258, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 27.973856209150327, |
|
"grad_norm": 7.706909656524658, |
|
"learning_rate": 2.9483430799220274e-05, |
|
"loss": 0.4901, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.6985294117647058, |
|
"eval_loss": 0.7287850379943848, |
|
"eval_runtime": 23.7514, |
|
"eval_samples_per_second": 5.726, |
|
"eval_steps_per_second": 0.716, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 28.235294117647058, |
|
"grad_norm": 5.9652886390686035, |
|
"learning_rate": 2.9239766081871346e-05, |
|
"loss": 0.4311, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 28.49673202614379, |
|
"grad_norm": 8.528486251831055, |
|
"learning_rate": 2.8996101364522422e-05, |
|
"loss": 0.3898, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 28.758169934640524, |
|
"grad_norm": 8.398402214050293, |
|
"learning_rate": 2.875243664717349e-05, |
|
"loss": 0.397, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 28.99346405228758, |
|
"eval_accuracy": 0.7279411764705882, |
|
"eval_loss": 0.7250666618347168, |
|
"eval_runtime": 21.8786, |
|
"eval_samples_per_second": 6.216, |
|
"eval_steps_per_second": 0.777, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 29.019607843137255, |
|
"grad_norm": 5.904468536376953, |
|
"learning_rate": 2.850877192982456e-05, |
|
"loss": 0.3617, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 29.281045751633986, |
|
"grad_norm": 5.135966777801514, |
|
"learning_rate": 2.8265107212475634e-05, |
|
"loss": 0.4141, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 29.54248366013072, |
|
"grad_norm": 7.617613792419434, |
|
"learning_rate": 2.802144249512671e-05, |
|
"loss": 0.4057, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 29.80392156862745, |
|
"grad_norm": 9.047426223754883, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.3818, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 29.986928104575163, |
|
"eval_accuracy": 0.7941176470588235, |
|
"eval_loss": 0.6250298023223877, |
|
"eval_runtime": 21.1786, |
|
"eval_samples_per_second": 6.422, |
|
"eval_steps_per_second": 0.803, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 30.065359477124183, |
|
"grad_norm": 8.303121566772461, |
|
"learning_rate": 2.7534113060428852e-05, |
|
"loss": 0.3813, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 30.326797385620914, |
|
"grad_norm": 6.311716079711914, |
|
"learning_rate": 2.729044834307992e-05, |
|
"loss": 0.3967, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 30.58823529411765, |
|
"grad_norm": 6.965037822723389, |
|
"learning_rate": 2.7046783625730997e-05, |
|
"loss": 0.4462, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 30.84967320261438, |
|
"grad_norm": 9.514204978942871, |
|
"learning_rate": 2.680311890838207e-05, |
|
"loss": 0.3412, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 30.980392156862745, |
|
"eval_accuracy": 0.7279411764705882, |
|
"eval_loss": 0.7065072059631348, |
|
"eval_runtime": 21.6767, |
|
"eval_samples_per_second": 6.274, |
|
"eval_steps_per_second": 0.784, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 31.11111111111111, |
|
"grad_norm": 9.511887550354004, |
|
"learning_rate": 2.655945419103314e-05, |
|
"loss": 0.4346, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 31.372549019607842, |
|
"grad_norm": 8.251455307006836, |
|
"learning_rate": 2.6315789473684212e-05, |
|
"loss": 0.3567, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 31.633986928104576, |
|
"grad_norm": 8.693254470825195, |
|
"learning_rate": 2.6072124756335288e-05, |
|
"loss": 0.4056, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 31.895424836601308, |
|
"grad_norm": 6.94719123840332, |
|
"learning_rate": 2.5828460038986357e-05, |
|
"loss": 0.3627, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7426470588235294, |
|
"eval_loss": 0.6876980662345886, |
|
"eval_runtime": 21.8581, |
|
"eval_samples_per_second": 6.222, |
|
"eval_steps_per_second": 0.778, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 32.15686274509804, |
|
"grad_norm": 6.152271270751953, |
|
"learning_rate": 2.5584795321637427e-05, |
|
"loss": 0.294, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 32.41830065359477, |
|
"grad_norm": 9.349310874938965, |
|
"learning_rate": 2.53411306042885e-05, |
|
"loss": 0.361, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 32.6797385620915, |
|
"grad_norm": 8.874456405639648, |
|
"learning_rate": 2.5097465886939576e-05, |
|
"loss": 0.3795, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 32.94117647058823, |
|
"grad_norm": 6.504201412200928, |
|
"learning_rate": 2.485380116959064e-05, |
|
"loss": 0.3557, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 32.99346405228758, |
|
"eval_accuracy": 0.8529411764705882, |
|
"eval_loss": 0.4244842231273651, |
|
"eval_runtime": 21.3842, |
|
"eval_samples_per_second": 6.36, |
|
"eval_steps_per_second": 0.795, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 33.20261437908497, |
|
"grad_norm": 6.195323467254639, |
|
"learning_rate": 2.4610136452241718e-05, |
|
"loss": 0.3335, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 33.4640522875817, |
|
"grad_norm": 9.20754623413086, |
|
"learning_rate": 2.4366471734892787e-05, |
|
"loss": 0.3705, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 33.72549019607843, |
|
"grad_norm": 7.307880878448486, |
|
"learning_rate": 2.412280701754386e-05, |
|
"loss": 0.3626, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 33.98692810457516, |
|
"grad_norm": 11.704693794250488, |
|
"learning_rate": 2.3879142300194932e-05, |
|
"loss": 0.441, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 33.98692810457516, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.6974249482154846, |
|
"eval_runtime": 21.7309, |
|
"eval_samples_per_second": 6.258, |
|
"eval_steps_per_second": 0.782, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 34.248366013071895, |
|
"grad_norm": 7.3577094078063965, |
|
"learning_rate": 2.3635477582846005e-05, |
|
"loss": 0.4363, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 34.509803921568626, |
|
"grad_norm": 4.33771276473999, |
|
"learning_rate": 2.3391812865497074e-05, |
|
"loss": 0.3408, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 34.77124183006536, |
|
"grad_norm": 7.208470821380615, |
|
"learning_rate": 2.314814814814815e-05, |
|
"loss": 0.3036, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 34.98039215686274, |
|
"eval_accuracy": 0.7426470588235294, |
|
"eval_loss": 0.6457906365394592, |
|
"eval_runtime": 22.2732, |
|
"eval_samples_per_second": 6.106, |
|
"eval_steps_per_second": 0.763, |
|
"step": 1338 |
|
}, |
|
{ |
|
"epoch": 35.032679738562095, |
|
"grad_norm": 7.787372589111328, |
|
"learning_rate": 2.290448343079922e-05, |
|
"loss": 0.3159, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 35.294117647058826, |
|
"grad_norm": 6.915111064910889, |
|
"learning_rate": 2.2660818713450292e-05, |
|
"loss": 0.3424, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 35.55555555555556, |
|
"grad_norm": 8.578511238098145, |
|
"learning_rate": 2.2417153996101365e-05, |
|
"loss": 0.3452, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 35.81699346405229, |
|
"grad_norm": 8.574066162109375, |
|
"learning_rate": 2.2173489278752438e-05, |
|
"loss": 0.3213, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.7941176470588235, |
|
"eval_loss": 0.5578574538230896, |
|
"eval_runtime": 21.3424, |
|
"eval_samples_per_second": 6.372, |
|
"eval_steps_per_second": 0.797, |
|
"step": 1377 |
|
}, |
|
{ |
|
"epoch": 36.07843137254902, |
|
"grad_norm": 7.610787391662598, |
|
"learning_rate": 2.1929824561403507e-05, |
|
"loss": 0.4071, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 36.33986928104575, |
|
"grad_norm": 8.612502098083496, |
|
"learning_rate": 2.1686159844054583e-05, |
|
"loss": 0.2995, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 36.60130718954248, |
|
"grad_norm": 8.465425491333008, |
|
"learning_rate": 2.1442495126705653e-05, |
|
"loss": 0.3456, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 36.86274509803921, |
|
"grad_norm": 7.883861541748047, |
|
"learning_rate": 2.1198830409356725e-05, |
|
"loss": 0.402, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 36.99346405228758, |
|
"eval_accuracy": 0.8382352941176471, |
|
"eval_loss": 0.4578179121017456, |
|
"eval_runtime": 20.8376, |
|
"eval_samples_per_second": 6.527, |
|
"eval_steps_per_second": 0.816, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 37.12418300653595, |
|
"grad_norm": 8.734749794006348, |
|
"learning_rate": 2.0955165692007798e-05, |
|
"loss": 0.3742, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 37.38562091503268, |
|
"grad_norm": 10.651437759399414, |
|
"learning_rate": 2.071150097465887e-05, |
|
"loss": 0.3225, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 37.64705882352941, |
|
"grad_norm": 8.572120666503906, |
|
"learning_rate": 2.046783625730994e-05, |
|
"loss": 0.3638, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 37.908496732026144, |
|
"grad_norm": 7.853600978851318, |
|
"learning_rate": 2.0224171539961016e-05, |
|
"loss": 0.2897, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 37.98692810457516, |
|
"eval_accuracy": 0.7867647058823529, |
|
"eval_loss": 0.5368921756744385, |
|
"eval_runtime": 21.9563, |
|
"eval_samples_per_second": 6.194, |
|
"eval_steps_per_second": 0.774, |
|
"step": 1453 |
|
}, |
|
{ |
|
"epoch": 38.169934640522875, |
|
"grad_norm": 7.19242000579834, |
|
"learning_rate": 1.9980506822612085e-05, |
|
"loss": 0.3608, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 38.431372549019606, |
|
"grad_norm": 5.555720329284668, |
|
"learning_rate": 1.9736842105263158e-05, |
|
"loss": 0.3496, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 38.69281045751634, |
|
"grad_norm": 7.676553249359131, |
|
"learning_rate": 1.949317738791423e-05, |
|
"loss": 0.3087, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 38.95424836601307, |
|
"grad_norm": 9.612250328063965, |
|
"learning_rate": 1.9249512670565304e-05, |
|
"loss": 0.348, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 38.98039215686274, |
|
"eval_accuracy": 0.7941176470588235, |
|
"eval_loss": 0.6819477081298828, |
|
"eval_runtime": 20.1264, |
|
"eval_samples_per_second": 6.757, |
|
"eval_steps_per_second": 0.845, |
|
"step": 1491 |
|
}, |
|
{ |
|
"epoch": 39.21568627450981, |
|
"grad_norm": 5.8754167556762695, |
|
"learning_rate": 1.9005847953216373e-05, |
|
"loss": 0.2784, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 39.47712418300654, |
|
"grad_norm": 9.667128562927246, |
|
"learning_rate": 1.876218323586745e-05, |
|
"loss": 0.3254, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 39.73856209150327, |
|
"grad_norm": 7.417483329772949, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.3416, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 10.744291305541992, |
|
"learning_rate": 1.827485380116959e-05, |
|
"loss": 0.3929, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.7867647058823529, |
|
"eval_loss": 0.5809765458106995, |
|
"eval_runtime": 22.4901, |
|
"eval_samples_per_second": 6.047, |
|
"eval_steps_per_second": 0.756, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 40.26143790849673, |
|
"grad_norm": 8.990880012512207, |
|
"learning_rate": 1.8031189083820664e-05, |
|
"loss": 0.3546, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 40.52287581699346, |
|
"grad_norm": 9.770576477050781, |
|
"learning_rate": 1.7787524366471736e-05, |
|
"loss": 0.344, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 40.78431372549019, |
|
"grad_norm": 7.653594017028809, |
|
"learning_rate": 1.7543859649122806e-05, |
|
"loss": 0.3173, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 40.99346405228758, |
|
"eval_accuracy": 0.7426470588235294, |
|
"eval_loss": 0.7875370979309082, |
|
"eval_runtime": 20.0479, |
|
"eval_samples_per_second": 6.784, |
|
"eval_steps_per_second": 0.848, |
|
"step": 1568 |
|
}, |
|
{ |
|
"epoch": 41.04575163398693, |
|
"grad_norm": 10.298673629760742, |
|
"learning_rate": 1.7300194931773882e-05, |
|
"loss": 0.3148, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 41.30718954248366, |
|
"grad_norm": 9.334877967834473, |
|
"learning_rate": 1.705653021442495e-05, |
|
"loss": 0.295, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 41.568627450980394, |
|
"grad_norm": 6.963639736175537, |
|
"learning_rate": 1.6812865497076024e-05, |
|
"loss": 0.3274, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 41.830065359477125, |
|
"grad_norm": 9.359763145446777, |
|
"learning_rate": 1.6569200779727097e-05, |
|
"loss": 0.3499, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 41.98692810457516, |
|
"eval_accuracy": 0.8014705882352942, |
|
"eval_loss": 0.5050505995750427, |
|
"eval_runtime": 19.6525, |
|
"eval_samples_per_second": 6.92, |
|
"eval_steps_per_second": 0.865, |
|
"step": 1606 |
|
}, |
|
{ |
|
"epoch": 42.091503267973856, |
|
"grad_norm": 8.404752731323242, |
|
"learning_rate": 1.632553606237817e-05, |
|
"loss": 0.3297, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 42.35294117647059, |
|
"grad_norm": 7.797074794769287, |
|
"learning_rate": 1.608187134502924e-05, |
|
"loss": 0.2784, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 42.61437908496732, |
|
"grad_norm": 6.273430347442627, |
|
"learning_rate": 1.5838206627680315e-05, |
|
"loss": 0.3084, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 42.87581699346405, |
|
"grad_norm": 8.596699714660645, |
|
"learning_rate": 1.5594541910331384e-05, |
|
"loss": 0.3053, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 42.98039215686274, |
|
"eval_accuracy": 0.7426470588235294, |
|
"eval_loss": 0.751028299331665, |
|
"eval_runtime": 19.7977, |
|
"eval_samples_per_second": 6.869, |
|
"eval_steps_per_second": 0.859, |
|
"step": 1644 |
|
}, |
|
{ |
|
"epoch": 43.13725490196079, |
|
"grad_norm": 10.001852989196777, |
|
"learning_rate": 1.5350877192982457e-05, |
|
"loss": 0.3818, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 43.39869281045752, |
|
"grad_norm": 6.75031042098999, |
|
"learning_rate": 1.5107212475633528e-05, |
|
"loss": 0.2612, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 43.66013071895425, |
|
"grad_norm": 4.408693790435791, |
|
"learning_rate": 1.4863547758284602e-05, |
|
"loss": 0.2901, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 43.92156862745098, |
|
"grad_norm": 9.371530532836914, |
|
"learning_rate": 1.4619883040935673e-05, |
|
"loss": 0.4109, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.6528570055961609, |
|
"eval_runtime": 20.0083, |
|
"eval_samples_per_second": 6.797, |
|
"eval_steps_per_second": 0.85, |
|
"step": 1683 |
|
}, |
|
{ |
|
"epoch": 44.18300653594771, |
|
"grad_norm": 7.803284168243408, |
|
"learning_rate": 1.4376218323586746e-05, |
|
"loss": 0.2882, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 44.44444444444444, |
|
"grad_norm": 6.260004043579102, |
|
"learning_rate": 1.4132553606237817e-05, |
|
"loss": 0.333, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 44.705882352941174, |
|
"grad_norm": 11.425740242004395, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 0.3003, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 44.967320261437905, |
|
"grad_norm": 4.394773960113525, |
|
"learning_rate": 1.364522417153996e-05, |
|
"loss": 0.3846, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 44.99346405228758, |
|
"eval_accuracy": 0.7132352941176471, |
|
"eval_loss": 0.961465060710907, |
|
"eval_runtime": 19.8391, |
|
"eval_samples_per_second": 6.855, |
|
"eval_steps_per_second": 0.857, |
|
"step": 1721 |
|
}, |
|
{ |
|
"epoch": 45.22875816993464, |
|
"grad_norm": 11.91213321685791, |
|
"learning_rate": 1.3401559454191035e-05, |
|
"loss": 0.3757, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 45.490196078431374, |
|
"grad_norm": 7.775343894958496, |
|
"learning_rate": 1.3157894736842106e-05, |
|
"loss": 0.3259, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 45.751633986928105, |
|
"grad_norm": 4.623126029968262, |
|
"learning_rate": 1.2914230019493179e-05, |
|
"loss": 0.3222, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 45.98692810457516, |
|
"eval_accuracy": 0.6691176470588235, |
|
"eval_loss": 0.8888900279998779, |
|
"eval_runtime": 19.6212, |
|
"eval_samples_per_second": 6.931, |
|
"eval_steps_per_second": 0.866, |
|
"step": 1759 |
|
}, |
|
{ |
|
"epoch": 46.01307189542484, |
|
"grad_norm": 7.839041233062744, |
|
"learning_rate": 1.267056530214425e-05, |
|
"loss": 0.2718, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 46.27450980392157, |
|
"grad_norm": 10.323081016540527, |
|
"learning_rate": 1.242690058479532e-05, |
|
"loss": 0.2538, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 46.5359477124183, |
|
"grad_norm": 8.446836471557617, |
|
"learning_rate": 1.2183235867446393e-05, |
|
"loss": 0.2974, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 46.79738562091503, |
|
"grad_norm": 8.584868431091309, |
|
"learning_rate": 1.1939571150097466e-05, |
|
"loss": 0.3293, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 46.98039215686274, |
|
"eval_accuracy": 0.8676470588235294, |
|
"eval_loss": 0.46983805298805237, |
|
"eval_runtime": 20.3581, |
|
"eval_samples_per_second": 6.68, |
|
"eval_steps_per_second": 0.835, |
|
"step": 1797 |
|
}, |
|
{ |
|
"epoch": 47.05882352941177, |
|
"grad_norm": 6.30557918548584, |
|
"learning_rate": 1.1695906432748537e-05, |
|
"loss": 0.2915, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 47.3202614379085, |
|
"grad_norm": 6.145007610321045, |
|
"learning_rate": 1.145224171539961e-05, |
|
"loss": 0.2948, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 47.58169934640523, |
|
"grad_norm": 8.976215362548828, |
|
"learning_rate": 1.1208576998050683e-05, |
|
"loss": 0.3189, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 47.84313725490196, |
|
"grad_norm": 5.437533855438232, |
|
"learning_rate": 1.0964912280701754e-05, |
|
"loss": 0.293, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.8014705882352942, |
|
"eval_loss": 0.5995640754699707, |
|
"eval_runtime": 22.4921, |
|
"eval_samples_per_second": 6.047, |
|
"eval_steps_per_second": 0.756, |
|
"step": 1836 |
|
}, |
|
{ |
|
"epoch": 48.10457516339869, |
|
"grad_norm": 7.454949855804443, |
|
"learning_rate": 1.0721247563352826e-05, |
|
"loss": 0.3486, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 48.36601307189542, |
|
"grad_norm": 5.587437152862549, |
|
"learning_rate": 1.0477582846003899e-05, |
|
"loss": 0.3269, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 48.627450980392155, |
|
"grad_norm": 7.708277702331543, |
|
"learning_rate": 1.023391812865497e-05, |
|
"loss": 0.3436, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 48.888888888888886, |
|
"grad_norm": 9.139860153198242, |
|
"learning_rate": 9.990253411306043e-06, |
|
"loss": 0.2363, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 48.99346405228758, |
|
"eval_accuracy": 0.8308823529411765, |
|
"eval_loss": 0.5006946921348572, |
|
"eval_runtime": 22.6256, |
|
"eval_samples_per_second": 6.011, |
|
"eval_steps_per_second": 0.751, |
|
"step": 1874 |
|
}, |
|
{ |
|
"epoch": 49.150326797385624, |
|
"grad_norm": 10.596453666687012, |
|
"learning_rate": 9.746588693957115e-06, |
|
"loss": 0.3911, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 49.411764705882355, |
|
"grad_norm": 7.040250301361084, |
|
"learning_rate": 9.502923976608186e-06, |
|
"loss": 0.3426, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 49.673202614379086, |
|
"grad_norm": 8.111015319824219, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.3189, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 49.93464052287582, |
|
"grad_norm": 4.160747528076172, |
|
"learning_rate": 9.015594541910332e-06, |
|
"loss": 0.2811, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 49.98692810457516, |
|
"eval_accuracy": 0.7941176470588235, |
|
"eval_loss": 0.6748296022415161, |
|
"eval_runtime": 21.1042, |
|
"eval_samples_per_second": 6.444, |
|
"eval_steps_per_second": 0.806, |
|
"step": 1912 |
|
}, |
|
{ |
|
"epoch": 50.19607843137255, |
|
"grad_norm": 7.058052062988281, |
|
"learning_rate": 8.771929824561403e-06, |
|
"loss": 0.2881, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 50.45751633986928, |
|
"grad_norm": 5.950506210327148, |
|
"learning_rate": 8.528265107212476e-06, |
|
"loss": 0.2872, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 50.71895424836601, |
|
"grad_norm": 7.726902484893799, |
|
"learning_rate": 8.284600389863548e-06, |
|
"loss": 0.3346, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 50.98039215686274, |
|
"grad_norm": 7.297433376312256, |
|
"learning_rate": 8.04093567251462e-06, |
|
"loss": 0.2403, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 50.98039215686274, |
|
"eval_accuracy": 0.7941176470588235, |
|
"eval_loss": 0.6595063209533691, |
|
"eval_runtime": 21.555, |
|
"eval_samples_per_second": 6.309, |
|
"eval_steps_per_second": 0.789, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 51.24183006535948, |
|
"grad_norm": 7.367440700531006, |
|
"learning_rate": 7.797270955165692e-06, |
|
"loss": 0.2824, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 51.50326797385621, |
|
"grad_norm": 10.130813598632812, |
|
"learning_rate": 7.553606237816764e-06, |
|
"loss": 0.3367, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 51.76470588235294, |
|
"grad_norm": 7.926657199859619, |
|
"learning_rate": 7.3099415204678366e-06, |
|
"loss": 0.2553, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.7794117647058824, |
|
"eval_loss": 0.5987350344657898, |
|
"eval_runtime": 20.0722, |
|
"eval_samples_per_second": 6.776, |
|
"eval_steps_per_second": 0.847, |
|
"step": 1989 |
|
}, |
|
{ |
|
"epoch": 52.02614379084967, |
|
"grad_norm": 8.90279769897461, |
|
"learning_rate": 7.066276803118908e-06, |
|
"loss": 0.3068, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 52.287581699346404, |
|
"grad_norm": 11.345956802368164, |
|
"learning_rate": 6.82261208576998e-06, |
|
"loss": 0.2884, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 52.549019607843135, |
|
"grad_norm": 8.919641494750977, |
|
"learning_rate": 6.578947368421053e-06, |
|
"loss": 0.295, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 52.810457516339866, |
|
"grad_norm": 7.802643775939941, |
|
"learning_rate": 6.335282651072125e-06, |
|
"loss": 0.2959, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 52.99346405228758, |
|
"eval_accuracy": 0.8235294117647058, |
|
"eval_loss": 0.5459432005882263, |
|
"eval_runtime": 20.7402, |
|
"eval_samples_per_second": 6.557, |
|
"eval_steps_per_second": 0.82, |
|
"step": 2027 |
|
}, |
|
{ |
|
"epoch": 53.071895424836605, |
|
"grad_norm": 6.629459381103516, |
|
"learning_rate": 6.091617933723197e-06, |
|
"loss": 0.3203, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 53.333333333333336, |
|
"grad_norm": 7.91646671295166, |
|
"learning_rate": 5.8479532163742686e-06, |
|
"loss": 0.3267, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 53.59477124183007, |
|
"grad_norm": 5.941601276397705, |
|
"learning_rate": 5.604288499025341e-06, |
|
"loss": 0.2962, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 53.8562091503268, |
|
"grad_norm": 7.222002029418945, |
|
"learning_rate": 5.360623781676413e-06, |
|
"loss": 0.3066, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 53.98692810457516, |
|
"eval_accuracy": 0.7867647058823529, |
|
"eval_loss": 0.6198421716690063, |
|
"eval_runtime": 20.555, |
|
"eval_samples_per_second": 6.616, |
|
"eval_steps_per_second": 0.827, |
|
"step": 2065 |
|
}, |
|
{ |
|
"epoch": 54.11764705882353, |
|
"grad_norm": 6.425452709197998, |
|
"learning_rate": 5.116959064327485e-06, |
|
"loss": 0.2961, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 54.37908496732026, |
|
"grad_norm": 9.548799514770508, |
|
"learning_rate": 4.873294346978558e-06, |
|
"loss": 0.3363, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 54.64052287581699, |
|
"grad_norm": 7.3025970458984375, |
|
"learning_rate": 4.6296296296296296e-06, |
|
"loss": 0.3843, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 54.90196078431372, |
|
"grad_norm": 8.06686019897461, |
|
"learning_rate": 4.3859649122807014e-06, |
|
"loss": 0.2981, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 54.98039215686274, |
|
"eval_accuracy": 0.8308823529411765, |
|
"eval_loss": 0.4885517656803131, |
|
"eval_runtime": 22.3067, |
|
"eval_samples_per_second": 6.097, |
|
"eval_steps_per_second": 0.762, |
|
"step": 2103 |
|
}, |
|
{ |
|
"epoch": 55.16339869281046, |
|
"grad_norm": 7.39772891998291, |
|
"learning_rate": 4.142300194931774e-06, |
|
"loss": 0.315, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 55.42483660130719, |
|
"grad_norm": 6.316030502319336, |
|
"learning_rate": 3.898635477582846e-06, |
|
"loss": 0.2416, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 55.68627450980392, |
|
"grad_norm": 9.764727592468262, |
|
"learning_rate": 3.6549707602339183e-06, |
|
"loss": 0.3007, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 55.947712418300654, |
|
"grad_norm": 9.428828239440918, |
|
"learning_rate": 3.41130604288499e-06, |
|
"loss": 0.2658, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.7794117647058824, |
|
"eval_loss": 0.6422163844108582, |
|
"eval_runtime": 20.6286, |
|
"eval_samples_per_second": 6.593, |
|
"eval_steps_per_second": 0.824, |
|
"step": 2142 |
|
}, |
|
{ |
|
"epoch": 56.209150326797385, |
|
"grad_norm": 11.038329124450684, |
|
"learning_rate": 3.1676413255360624e-06, |
|
"loss": 0.3298, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 56.470588235294116, |
|
"grad_norm": 7.585492134094238, |
|
"learning_rate": 2.9239766081871343e-06, |
|
"loss": 0.3365, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 56.73202614379085, |
|
"grad_norm": 11.289450645446777, |
|
"learning_rate": 2.6803118908382066e-06, |
|
"loss": 0.3559, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 56.99346405228758, |
|
"grad_norm": 8.215266227722168, |
|
"learning_rate": 2.436647173489279e-06, |
|
"loss": 0.2371, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 56.99346405228758, |
|
"eval_accuracy": 0.8382352941176471, |
|
"eval_loss": 0.4999590516090393, |
|
"eval_runtime": 19.8968, |
|
"eval_samples_per_second": 6.835, |
|
"eval_steps_per_second": 0.854, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 57.254901960784316, |
|
"grad_norm": 6.436427116394043, |
|
"learning_rate": 2.1929824561403507e-06, |
|
"loss": 0.2826, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 57.51633986928105, |
|
"grad_norm": 10.186029434204102, |
|
"learning_rate": 1.949317738791423e-06, |
|
"loss": 0.3155, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 57.77777777777778, |
|
"grad_norm": 5.971257209777832, |
|
"learning_rate": 1.705653021442495e-06, |
|
"loss": 0.2331, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 57.98692810457516, |
|
"eval_accuracy": 0.7132352941176471, |
|
"eval_loss": 0.8853857517242432, |
|
"eval_runtime": 21.9358, |
|
"eval_samples_per_second": 6.2, |
|
"eval_steps_per_second": 0.775, |
|
"step": 2218 |
|
}, |
|
{ |
|
"epoch": 58.03921568627451, |
|
"grad_norm": 5.772573947906494, |
|
"learning_rate": 1.4619883040935671e-06, |
|
"loss": 0.2802, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 58.30065359477124, |
|
"grad_norm": 8.661057472229004, |
|
"learning_rate": 1.2183235867446394e-06, |
|
"loss": 0.286, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 58.56209150326797, |
|
"grad_norm": 7.350020408630371, |
|
"learning_rate": 9.746588693957115e-07, |
|
"loss": 0.2523, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 58.8235294117647, |
|
"grad_norm": 10.219001770019531, |
|
"learning_rate": 7.309941520467836e-07, |
|
"loss": 0.2777, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 58.98039215686274, |
|
"eval_accuracy": 0.8014705882352942, |
|
"eval_loss": 0.6190231442451477, |
|
"eval_runtime": 19.9884, |
|
"eval_samples_per_second": 6.804, |
|
"eval_steps_per_second": 0.85, |
|
"step": 2256 |
|
}, |
|
{ |
|
"epoch": 59.08496732026144, |
|
"grad_norm": 6.8238396644592285, |
|
"learning_rate": 4.873294346978557e-07, |
|
"loss": 0.2722, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 59.34640522875817, |
|
"grad_norm": 9.155200004577637, |
|
"learning_rate": 2.436647173489279e-07, |
|
"loss": 0.2591, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 59.6078431372549, |
|
"grad_norm": 7.341146469116211, |
|
"learning_rate": 0.0, |
|
"loss": 0.3047, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 59.6078431372549, |
|
"eval_accuracy": 0.7647058823529411, |
|
"eval_loss": 0.604828417301178, |
|
"eval_runtime": 20.7055, |
|
"eval_samples_per_second": 6.568, |
|
"eval_steps_per_second": 0.821, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 59.6078431372549, |
|
"step": 2280, |
|
"total_flos": 1.9206376588836864e+17, |
|
"train_loss": 0.5395637326073228, |
|
"train_runtime": 14275.0415, |
|
"train_samples_per_second": 5.145, |
|
"train_steps_per_second": 0.16 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2280, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 60, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.9206376588836864e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|