|
{ |
|
"best_metric": 0.9778142974527526, |
|
"best_model_checkpoint": "teacher-status-van-tiny-256-1-2/checkpoint-703", |
|
"epoch": 29.55223880597015, |
|
"eval_steps": 500, |
|
"global_step": 990, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 5.050505050505051e-06, |
|
"loss": 0.6865, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 1.0101010101010101e-05, |
|
"loss": 0.683, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.5151515151515153e-05, |
|
"loss": 0.6722, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.640083945435467, |
|
"eval_f1_score": 0.780550223928343, |
|
"eval_loss": 0.6498541831970215, |
|
"eval_precision": 0.640083945435467, |
|
"eval_recall": 1.0, |
|
"eval_runtime": 6.6964, |
|
"eval_samples_per_second": 142.315, |
|
"eval_steps_per_second": 1.195, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 2.0202020202020203e-05, |
|
"loss": 0.646, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 2.5252525252525256e-05, |
|
"loss": 0.6078, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 3.0303030303030306e-05, |
|
"loss": 0.5431, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7817418677859391, |
|
"eval_f1_score": 0.8531073446327684, |
|
"eval_loss": 0.4163793921470642, |
|
"eval_precision": 0.749379652605459, |
|
"eval_recall": 0.9901639344262295, |
|
"eval_runtime": 6.7638, |
|
"eval_samples_per_second": 140.898, |
|
"eval_steps_per_second": 1.183, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 3.535353535353535e-05, |
|
"loss": 0.497, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 4.0404040404040405e-05, |
|
"loss": 0.4483, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 4.545454545454546e-05, |
|
"loss": 0.4193, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 4.994388327721661e-05, |
|
"loss": 0.393, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"eval_accuracy": 0.887722980062959, |
|
"eval_f1_score": 0.9078380706287683, |
|
"eval_loss": 0.2832907438278198, |
|
"eval_precision": 0.956442831215971, |
|
"eval_recall": 0.8639344262295082, |
|
"eval_runtime": 6.6321, |
|
"eval_samples_per_second": 143.695, |
|
"eval_steps_per_second": 1.206, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"learning_rate": 4.938271604938271e-05, |
|
"loss": 0.3729, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 4.882154882154882e-05, |
|
"loss": 0.3616, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 4.8260381593714935e-05, |
|
"loss": 0.354, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9275970619097587, |
|
"eval_f1_score": 0.9435813573180704, |
|
"eval_loss": 0.19304586946964264, |
|
"eval_precision": 0.9412724306688418, |
|
"eval_recall": 0.9459016393442623, |
|
"eval_runtime": 6.5785, |
|
"eval_samples_per_second": 144.865, |
|
"eval_steps_per_second": 1.216, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 4.7699214365881036e-05, |
|
"loss": 0.3214, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"learning_rate": 4.713804713804714e-05, |
|
"loss": 0.3272, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 4.6576879910213244e-05, |
|
"loss": 0.3007, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_accuracy": 0.9370409233997902, |
|
"eval_f1_score": 0.9510603588907015, |
|
"eval_loss": 0.15851934254169464, |
|
"eval_precision": 0.9464285714285714, |
|
"eval_recall": 0.9557377049180328, |
|
"eval_runtime": 6.4566, |
|
"eval_samples_per_second": 147.6, |
|
"eval_steps_per_second": 1.239, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 4.601571268237935e-05, |
|
"loss": 0.3006, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 5.37, |
|
"learning_rate": 4.545454545454546e-05, |
|
"loss": 0.2935, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 4.4893378226711566e-05, |
|
"loss": 0.3092, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 4.433221099887767e-05, |
|
"loss": 0.2898, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9464847848898216, |
|
"eval_f1_score": 0.9580936729663106, |
|
"eval_loss": 0.144499272108078, |
|
"eval_precision": 0.9604612850082372, |
|
"eval_recall": 0.9557377049180328, |
|
"eval_runtime": 6.0258, |
|
"eval_samples_per_second": 158.154, |
|
"eval_steps_per_second": 1.328, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 4.3771043771043774e-05, |
|
"loss": 0.2696, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 4.3209876543209875e-05, |
|
"loss": 0.3025, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 4.264870931537598e-05, |
|
"loss": 0.2824, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"eval_accuracy": 0.9464847848898216, |
|
"eval_f1_score": 0.9579554822753504, |
|
"eval_loss": 0.13527622818946838, |
|
"eval_precision": 0.9635157545605307, |
|
"eval_recall": 0.9524590163934427, |
|
"eval_runtime": 6.0159, |
|
"eval_samples_per_second": 158.414, |
|
"eval_steps_per_second": 1.33, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 4.208754208754209e-05, |
|
"loss": 0.2782, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 4.15263748597082e-05, |
|
"loss": 0.2629, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"learning_rate": 4.0965207631874305e-05, |
|
"loss": 0.2763, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9485834207764953, |
|
"eval_f1_score": 0.960323886639676, |
|
"eval_loss": 0.13593612611293793, |
|
"eval_precision": 0.9488, |
|
"eval_recall": 0.9721311475409836, |
|
"eval_runtime": 6.3017, |
|
"eval_samples_per_second": 151.228, |
|
"eval_steps_per_second": 1.269, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 4.0404040404040405e-05, |
|
"loss": 0.2698, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"learning_rate": 3.984287317620651e-05, |
|
"loss": 0.2621, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"learning_rate": 3.9281705948372613e-05, |
|
"loss": 0.2631, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 3.872053872053872e-05, |
|
"loss": 0.2473, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_accuracy": 0.9569779643231899, |
|
"eval_f1_score": 0.9664209664209664, |
|
"eval_loss": 0.12131528556346893, |
|
"eval_precision": 0.9656301145662848, |
|
"eval_recall": 0.9672131147540983, |
|
"eval_runtime": 6.691, |
|
"eval_samples_per_second": 142.431, |
|
"eval_steps_per_second": 1.196, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 9.25, |
|
"learning_rate": 3.815937149270483e-05, |
|
"loss": 0.2339, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"learning_rate": 3.7598204264870936e-05, |
|
"loss": 0.249, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 9.85, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.2598, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9569779643231899, |
|
"eval_f1_score": 0.966530612244898, |
|
"eval_loss": 0.10907502472400665, |
|
"eval_precision": 0.9626016260162602, |
|
"eval_recall": 0.9704918032786886, |
|
"eval_runtime": 6.9942, |
|
"eval_samples_per_second": 136.256, |
|
"eval_steps_per_second": 1.144, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 10.15, |
|
"learning_rate": 3.6475869809203144e-05, |
|
"loss": 0.2497, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 10.45, |
|
"learning_rate": 3.5914702581369245e-05, |
|
"loss": 0.253, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 10.75, |
|
"learning_rate": 3.535353535353535e-05, |
|
"loss": 0.2476, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"eval_accuracy": 0.9632738719832109, |
|
"eval_f1_score": 0.9714285714285714, |
|
"eval_loss": 0.1040654331445694, |
|
"eval_precision": 0.967479674796748, |
|
"eval_recall": 0.9754098360655737, |
|
"eval_runtime": 6.6817, |
|
"eval_samples_per_second": 142.629, |
|
"eval_steps_per_second": 1.197, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 11.04, |
|
"learning_rate": 3.4792368125701466e-05, |
|
"loss": 0.2314, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 11.34, |
|
"learning_rate": 3.423120089786757e-05, |
|
"loss": 0.2422, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 11.64, |
|
"learning_rate": 3.3670033670033675e-05, |
|
"loss": 0.2406, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 11.94, |
|
"learning_rate": 3.3108866442199775e-05, |
|
"loss": 0.2376, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9601259181532005, |
|
"eval_f1_score": 0.9686468646864688, |
|
"eval_loss": 0.09974055737257004, |
|
"eval_precision": 0.9750830564784053, |
|
"eval_recall": 0.9622950819672131, |
|
"eval_runtime": 6.6227, |
|
"eval_samples_per_second": 143.899, |
|
"eval_steps_per_second": 1.208, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 12.24, |
|
"learning_rate": 3.254769921436588e-05, |
|
"loss": 0.2282, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 12.54, |
|
"learning_rate": 3.198653198653199e-05, |
|
"loss": 0.2264, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 12.84, |
|
"learning_rate": 3.14253647586981e-05, |
|
"loss": 0.2402, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 12.99, |
|
"eval_accuracy": 0.9622245540398741, |
|
"eval_f1_score": 0.9703947368421052, |
|
"eval_loss": 0.09718549996614456, |
|
"eval_precision": 0.9735973597359736, |
|
"eval_recall": 0.9672131147540983, |
|
"eval_runtime": 6.1102, |
|
"eval_samples_per_second": 155.969, |
|
"eval_steps_per_second": 1.309, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 13.13, |
|
"learning_rate": 3.08641975308642e-05, |
|
"loss": 0.2216, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 13.43, |
|
"learning_rate": 3.0303030303030306e-05, |
|
"loss": 0.2361, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 13.73, |
|
"learning_rate": 2.9741863075196406e-05, |
|
"loss": 0.2324, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9664218258132214, |
|
"eval_f1_score": 0.973941368078176, |
|
"eval_loss": 0.09498707950115204, |
|
"eval_precision": 0.9676375404530745, |
|
"eval_recall": 0.980327868852459, |
|
"eval_runtime": 6.0066, |
|
"eval_samples_per_second": 158.659, |
|
"eval_steps_per_second": 1.332, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 14.03, |
|
"learning_rate": 2.9180695847362517e-05, |
|
"loss": 0.2223, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 14.33, |
|
"learning_rate": 2.8619528619528618e-05, |
|
"loss": 0.213, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 14.63, |
|
"learning_rate": 2.8058361391694725e-05, |
|
"loss": 0.2206, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"learning_rate": 2.7497194163860833e-05, |
|
"loss": 0.2256, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"eval_accuracy": 0.9706190975865687, |
|
"eval_f1_score": 0.9770114942528735, |
|
"eval_loss": 0.09094734489917755, |
|
"eval_precision": 0.9786184210526315, |
|
"eval_recall": 0.9754098360655737, |
|
"eval_runtime": 5.9668, |
|
"eval_samples_per_second": 159.718, |
|
"eval_steps_per_second": 1.341, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 15.22, |
|
"learning_rate": 2.6936026936026937e-05, |
|
"loss": 0.209, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 15.52, |
|
"learning_rate": 2.6374859708193044e-05, |
|
"loss": 0.2254, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 15.82, |
|
"learning_rate": 2.581369248035915e-05, |
|
"loss": 0.21, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9622245540398741, |
|
"eval_f1_score": 0.970345963756178, |
|
"eval_loss": 0.09220422059297562, |
|
"eval_precision": 0.9751655629139073, |
|
"eval_recall": 0.9655737704918033, |
|
"eval_runtime": 6.5986, |
|
"eval_samples_per_second": 144.424, |
|
"eval_steps_per_second": 1.212, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 16.12, |
|
"learning_rate": 2.5252525252525256e-05, |
|
"loss": 0.2114, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 16.42, |
|
"learning_rate": 2.4691358024691357e-05, |
|
"loss": 0.1963, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 16.72, |
|
"learning_rate": 2.4130190796857467e-05, |
|
"loss": 0.217, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 16.99, |
|
"eval_accuracy": 0.9611752360965372, |
|
"eval_f1_score": 0.9695473251028807, |
|
"eval_loss": 0.09329694509506226, |
|
"eval_precision": 0.9735537190082645, |
|
"eval_recall": 0.9655737704918033, |
|
"eval_runtime": 6.5505, |
|
"eval_samples_per_second": 145.485, |
|
"eval_steps_per_second": 1.221, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"learning_rate": 2.356902356902357e-05, |
|
"loss": 0.219, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 17.31, |
|
"learning_rate": 2.3007856341189676e-05, |
|
"loss": 0.2154, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 17.61, |
|
"learning_rate": 2.2446689113355783e-05, |
|
"loss": 0.2157, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 17.91, |
|
"learning_rate": 2.1885521885521887e-05, |
|
"loss": 0.2092, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.9664218258132214, |
|
"eval_f1_score": 0.9738134206219312, |
|
"eval_loss": 0.08909059315919876, |
|
"eval_precision": 0.9722222222222222, |
|
"eval_recall": 0.9754098360655737, |
|
"eval_runtime": 5.9927, |
|
"eval_samples_per_second": 159.027, |
|
"eval_steps_per_second": 1.335, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 18.21, |
|
"learning_rate": 2.132435465768799e-05, |
|
"loss": 0.1979, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 18.51, |
|
"learning_rate": 2.07631874298541e-05, |
|
"loss": 0.1975, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 18.81, |
|
"learning_rate": 2.0202020202020203e-05, |
|
"loss": 0.2063, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"eval_accuracy": 0.9653725078698846, |
|
"eval_f1_score": 0.972972972972973, |
|
"eval_loss": 0.09130384773015976, |
|
"eval_precision": 0.972176759410802, |
|
"eval_recall": 0.9737704918032787, |
|
"eval_runtime": 5.9748, |
|
"eval_samples_per_second": 159.504, |
|
"eval_steps_per_second": 1.339, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 19.1, |
|
"learning_rate": 1.9640852974186307e-05, |
|
"loss": 0.215, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 19.4, |
|
"learning_rate": 1.9079685746352414e-05, |
|
"loss": 0.214, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 19.7, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.2072, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 1.7957351290684622e-05, |
|
"loss": 0.2217, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9643231899265478, |
|
"eval_f1_score": 0.971993410214168, |
|
"eval_loss": 0.09169190376996994, |
|
"eval_precision": 0.9768211920529801, |
|
"eval_recall": 0.9672131147540983, |
|
"eval_runtime": 6.2954, |
|
"eval_samples_per_second": 151.381, |
|
"eval_steps_per_second": 1.271, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 20.3, |
|
"learning_rate": 1.7396184062850733e-05, |
|
"loss": 0.1996, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 20.6, |
|
"learning_rate": 1.6835016835016837e-05, |
|
"loss": 0.1968, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 20.9, |
|
"learning_rate": 1.627384960718294e-05, |
|
"loss": 0.1952, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 20.99, |
|
"eval_accuracy": 0.9716684155299056, |
|
"eval_f1_score": 0.9778142974527526, |
|
"eval_loss": 0.0859055444598198, |
|
"eval_precision": 0.9802306425041186, |
|
"eval_recall": 0.9754098360655737, |
|
"eval_runtime": 6.6363, |
|
"eval_samples_per_second": 143.604, |
|
"eval_steps_per_second": 1.205, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 21.19, |
|
"learning_rate": 1.571268237934905e-05, |
|
"loss": 0.2051, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 21.49, |
|
"learning_rate": 1.5151515151515153e-05, |
|
"loss": 0.1927, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 21.79, |
|
"learning_rate": 1.4590347923681259e-05, |
|
"loss": 0.2068, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.968520461699895, |
|
"eval_f1_score": 0.9754500818330606, |
|
"eval_loss": 0.09068847447633743, |
|
"eval_precision": 0.9738562091503268, |
|
"eval_recall": 0.9770491803278688, |
|
"eval_runtime": 6.1893, |
|
"eval_samples_per_second": 153.975, |
|
"eval_steps_per_second": 1.293, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 22.09, |
|
"learning_rate": 1.4029180695847363e-05, |
|
"loss": 0.1957, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 22.39, |
|
"learning_rate": 1.3468013468013468e-05, |
|
"loss": 0.2021, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 22.69, |
|
"learning_rate": 1.2906846240179574e-05, |
|
"loss": 0.202, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 22.99, |
|
"learning_rate": 1.2345679012345678e-05, |
|
"loss": 0.1914, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 22.99, |
|
"eval_accuracy": 0.9695697796432319, |
|
"eval_f1_score": 0.9762878168438266, |
|
"eval_loss": 0.08471482992172241, |
|
"eval_precision": 0.9738988580750407, |
|
"eval_recall": 0.978688524590164, |
|
"eval_runtime": 5.9499, |
|
"eval_samples_per_second": 160.172, |
|
"eval_steps_per_second": 1.345, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 23.28, |
|
"learning_rate": 1.1784511784511786e-05, |
|
"loss": 0.1937, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 23.58, |
|
"learning_rate": 1.1223344556677892e-05, |
|
"loss": 0.1876, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 23.88, |
|
"learning_rate": 1.0662177328843996e-05, |
|
"loss": 0.1961, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.968520461699895, |
|
"eval_f1_score": 0.9754500818330606, |
|
"eval_loss": 0.08703567832708359, |
|
"eval_precision": 0.9738562091503268, |
|
"eval_recall": 0.9770491803278688, |
|
"eval_runtime": 6.0627, |
|
"eval_samples_per_second": 157.191, |
|
"eval_steps_per_second": 1.32, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 24.18, |
|
"learning_rate": 1.0101010101010101e-05, |
|
"loss": 0.1817, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 24.48, |
|
"learning_rate": 9.539842873176207e-06, |
|
"loss": 0.1869, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 24.78, |
|
"learning_rate": 8.978675645342311e-06, |
|
"loss": 0.1911, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 24.99, |
|
"eval_accuracy": 0.9664218258132214, |
|
"eval_f1_score": 0.9738562091503268, |
|
"eval_loss": 0.08837948739528656, |
|
"eval_precision": 0.9706840390879479, |
|
"eval_recall": 0.9770491803278688, |
|
"eval_runtime": 6.585, |
|
"eval_samples_per_second": 144.722, |
|
"eval_steps_per_second": 1.215, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 25.07, |
|
"learning_rate": 8.417508417508419e-06, |
|
"loss": 0.1904, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 25.37, |
|
"learning_rate": 7.856341189674524e-06, |
|
"loss": 0.1831, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 25.67, |
|
"learning_rate": 7.295173961840629e-06, |
|
"loss": 0.184, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 25.97, |
|
"learning_rate": 6.734006734006734e-06, |
|
"loss": 0.1961, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.968520461699895, |
|
"eval_f1_score": 0.9753694581280787, |
|
"eval_loss": 0.08696460723876953, |
|
"eval_precision": 0.9769736842105263, |
|
"eval_recall": 0.9737704918032787, |
|
"eval_runtime": 6.5592, |
|
"eval_samples_per_second": 145.293, |
|
"eval_steps_per_second": 1.22, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 26.27, |
|
"learning_rate": 6.172839506172839e-06, |
|
"loss": 0.1988, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 26.57, |
|
"learning_rate": 5.611672278338946e-06, |
|
"loss": 0.1796, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 26.87, |
|
"learning_rate": 5.050505050505051e-06, |
|
"loss": 0.1978, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 26.99, |
|
"eval_accuracy": 0.968520461699895, |
|
"eval_f1_score": 0.9754098360655737, |
|
"eval_loss": 0.08714743703603745, |
|
"eval_precision": 0.9754098360655737, |
|
"eval_recall": 0.9754098360655737, |
|
"eval_runtime": 5.8888, |
|
"eval_samples_per_second": 161.832, |
|
"eval_steps_per_second": 1.359, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 27.16, |
|
"learning_rate": 4.489337822671156e-06, |
|
"loss": 0.2099, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 27.46, |
|
"learning_rate": 3.928170594837262e-06, |
|
"loss": 0.1867, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 27.76, |
|
"learning_rate": 3.367003367003367e-06, |
|
"loss": 0.1854, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.968520461699895, |
|
"eval_f1_score": 0.9754500818330606, |
|
"eval_loss": 0.08582841604948044, |
|
"eval_precision": 0.9738562091503268, |
|
"eval_recall": 0.9770491803278688, |
|
"eval_runtime": 5.9622, |
|
"eval_samples_per_second": 159.839, |
|
"eval_steps_per_second": 1.342, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 28.06, |
|
"learning_rate": 2.805836139169473e-06, |
|
"loss": 0.1931, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 28.36, |
|
"learning_rate": 2.244668911335578e-06, |
|
"loss": 0.1957, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 28.66, |
|
"learning_rate": 1.6835016835016836e-06, |
|
"loss": 0.1849, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 28.96, |
|
"learning_rate": 1.122334455667789e-06, |
|
"loss": 0.1733, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 28.99, |
|
"eval_accuracy": 0.968520461699895, |
|
"eval_f1_score": 0.9753694581280787, |
|
"eval_loss": 0.08600697666406631, |
|
"eval_precision": 0.9769736842105263, |
|
"eval_recall": 0.9737704918032787, |
|
"eval_runtime": 6.5847, |
|
"eval_samples_per_second": 144.729, |
|
"eval_steps_per_second": 1.215, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 29.25, |
|
"learning_rate": 5.611672278338944e-07, |
|
"loss": 0.202, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 29.55, |
|
"learning_rate": 0.0, |
|
"loss": 0.1762, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 29.55, |
|
"eval_accuracy": 0.9664218258132214, |
|
"eval_f1_score": 0.9737704918032787, |
|
"eval_loss": 0.0858435109257698, |
|
"eval_precision": 0.9737704918032787, |
|
"eval_recall": 0.9737704918032787, |
|
"eval_runtime": 6.517, |
|
"eval_samples_per_second": 146.233, |
|
"eval_steps_per_second": 1.228, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 29.55, |
|
"step": 990, |
|
"total_flos": 1.1503239286004122e+18, |
|
"train_loss": 0.2626483961789295, |
|
"train_runtime": 6214.0737, |
|
"train_samples_per_second": 41.379, |
|
"train_steps_per_second": 0.159 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 990, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 500, |
|
"total_flos": 1.1503239286004122e+18, |
|
"train_batch_size": 128, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|