{ "best_metric": 2.003035545349121, "best_model_checkpoint": "/mnt/default/projects/sca-xiaoke-v3/amlt-results/7301932201.25563-cd1e6021-6ea9-4835-8578-ba26f723a708/checkpoint-100000", "epoch": 3.673229503379371, "global_step": 100000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "_do_backward_in_ms": 13833.612655987963, "_prepare_inputs_in_ms": 9.499168023467064, "compute_loss_in_ms": 1344.5582571439445, "epoch": 0.0, "learning_rate/full": 0.0, "loss": 9.9783, "step": 1, "training_step_in_ms": 15198.054818203673 }, { "epoch": 0.0, "eval_objects365-local-v2-validation_loss": 9.392258644104004, "eval_objects365-local-v2-validation_runtime": 16.1869, "eval_objects365-local-v2-validation_samples_per_second": 49.423, "eval_objects365-local-v2-validation_steps_per_second": 0.803, "step": 1 }, { "_do_backward_in_ms": 2550.187955102608, "_prepare_inputs_in_ms": 12.46627290174549, "compute_loss_in_ms": 289.9478549773164, "epoch": 0.04, "learning_rate/full": 9.998519814211118e-05, "loss": 3.8861, "step": 1000, "training_step_in_ms": 2849.577255269468 }, { "_do_backward_in_ms": 2556.9965168242343, "_prepare_inputs_in_ms": 3.3681516400538385, "compute_loss_in_ms": 289.68224829342216, "epoch": 0.07, "learning_rate/full": 9.992214555651498e-05, "loss": 2.6909, "step": 2000, "training_step_in_ms": 2856.0172235199716 }, { "_do_backward_in_ms": 2544.9560291268863, "_prepare_inputs_in_ms": 3.326028081588447, "compute_loss_in_ms": 289.6684127182234, "epoch": 0.11, "learning_rate/full": 9.980962819544264e-05, "loss": 2.4899, "step": 3000, "training_step_in_ms": 2843.9263667755295 }, { "_do_backward_in_ms": 2553.390086729545, "_prepare_inputs_in_ms": 3.3251649560406804, "compute_loss_in_ms": 289.7164152129553, "epoch": 0.15, "learning_rate/full": 9.964775754540861e-05, "loss": 2.364, "step": 4000, "training_step_in_ms": 2852.3637848414946 }, { "_do_backward_in_ms": 2533.654973218916, "_prepare_inputs_in_ms": 3.3234398325439543, "compute_loss_in_ms": 289.49879492027685, "epoch": 0.18, "learning_rate/full": 9.94366939940579e-05, "loss": 2.2671, "step": 5000, "training_step_in_ms": 2832.4309401281644 }, { "epoch": 0.18, "eval_objects365-local-v2-validation_loss": 3.2006750106811523, "eval_objects365-local-v2-validation_runtime": 15.8807, "eval_objects365-local-v2-validation_samples_per_second": 50.376, "eval_objects365-local-v2-validation_steps_per_second": 0.819, "step": 5000 }, { "_do_backward_in_ms": 2536.1245670726057, "_prepare_inputs_in_ms": 13.602690789379688, "compute_loss_in_ms": 290.3555030166838, "epoch": 0.22, "learning_rate/full": 9.917664667124788e-05, "loss": 2.2035, "step": 6000, "training_step_in_ms": 2835.8918179108296 }, { "_do_backward_in_ms": 2521.147600293625, "_prepare_inputs_in_ms": 3.3214590828865767, "compute_loss_in_ms": 290.27019435027614, "epoch": 0.26, "learning_rate/full": 9.886787324183434e-05, "loss": 2.1482, "step": 7000, "training_step_in_ms": 2820.6920346897095 }, { "_do_backward_in_ms": 2548.451570129022, "_prepare_inputs_in_ms": 3.319471804657951, "compute_loss_in_ms": 289.1897811254021, "epoch": 0.29, "learning_rate/full": 9.851067965036731e-05, "loss": 2.0894, "step": 8000, "training_step_in_ms": 2846.943020476494 }, { "_do_backward_in_ms": 2545.683652488282, "_prepare_inputs_in_ms": 3.316459863912314, "compute_loss_in_ms": 289.3646022947505, "epoch": 0.33, "learning_rate/full": 9.81058489571687e-05, "loss": 2.0449, "step": 9000, "training_step_in_ms": 2844.350300100632 }, { "_do_backward_in_ms": 2587.985424251994, "_prepare_inputs_in_ms": 3.3290419806726277, "compute_loss_in_ms": 289.5182610661723, "epoch": 0.37, "learning_rate/full": 9.765344841950912e-05, "loss": 2.0159, "step": 10000, "training_step_in_ms": 2886.8384181782603 }, { "epoch": 0.37, "eval_objects365-local-v2-validation_loss": 2.7690136432647705, "eval_objects365-local-v2-validation_runtime": 9.6182, "eval_objects365-local-v2-validation_samples_per_second": 83.175, "eval_objects365-local-v2-validation_steps_per_second": 1.352, "step": 10000 }, { "_do_backward_in_ms": 2580.9500538480934, "_prepare_inputs_in_ms": 7.9110309222817925, "compute_loss_in_ms": 289.0381096436588, "epoch": 0.4, "learning_rate/full": 9.715392539719363e-05, "loss": 1.9885, "step": 11000, "training_step_in_ms": 2879.298744171858 }, { "_do_backward_in_ms": 2542.390609878581, "_prepare_inputs_in_ms": 3.3009646027348936, "compute_loss_in_ms": 289.6751387268305, "epoch": 0.44, "learning_rate/full": 9.660720396490479e-05, "loss": 1.9603, "step": 12000, "training_step_in_ms": 2841.272174295038 }, { "_do_backward_in_ms": 2512.176790253725, "_prepare_inputs_in_ms": 3.362945111002773, "compute_loss_in_ms": 289.53350385534577, "epoch": 0.48, "learning_rate/full": 9.601491810524181e-05, "loss": 1.9403, "step": 13000, "training_step_in_ms": 2811.0723278834485 }, { "_do_backward_in_ms": 2531.5591839374974, "_prepare_inputs_in_ms": 3.3155137847643346, "compute_loss_in_ms": 290.233249894809, "epoch": 0.51, "learning_rate/full": 9.537646901827713e-05, "loss": 1.9229, "step": 14000, "training_step_in_ms": 2831.124616945861 }, { "_do_backward_in_ms": 2558.8225115824025, "_prepare_inputs_in_ms": 3.351889422861859, "compute_loss_in_ms": 289.55780304642394, "epoch": 0.55, "learning_rate/full": 9.469376483242311e-05, "loss": 1.9078, "step": 15000, "training_step_in_ms": 2857.7440589836333 }, { "epoch": 0.55, "eval_objects365-local-v2-validation_loss": 2.566831588745117, "eval_objects365-local-v2-validation_runtime": 12.0469, "eval_objects365-local-v2-validation_samples_per_second": 66.407, "eval_objects365-local-v2-validation_steps_per_second": 1.079, "step": 15000 }, { "_do_backward_in_ms": 2569.1758075398393, "_prepare_inputs_in_ms": 9.903900120278632, "compute_loss_in_ms": 289.8735368660185, "epoch": 0.59, "learning_rate/full": 9.396611533498878e-05, "loss": 1.8953, "step": 16000, "training_step_in_ms": 2868.428954576142 }, { "_do_backward_in_ms": 2524.4527391090523, "_prepare_inputs_in_ms": 3.3152836377266794, "compute_loss_in_ms": 289.7500243561808, "epoch": 0.62, "learning_rate/full": 9.319569524704217e-05, "loss": 1.8846, "step": 17000, "training_step_in_ms": 2823.4884984181263 }, { "_do_backward_in_ms": 2517.142957175616, "_prepare_inputs_in_ms": 3.341707782819867, "compute_loss_in_ms": 289.59233529726043, "epoch": 0.66, "learning_rate/full": 9.238172567527468e-05, "loss": 1.8726, "step": 18000, "training_step_in_ms": 2816.0720933733974 }, { "_do_backward_in_ms": 2532.7779561954085, "_prepare_inputs_in_ms": 3.3180122550111264, "compute_loss_in_ms": 289.8389355558902, "epoch": 0.7, "learning_rate/full": 9.152751594369358e-05, "loss": 1.864, "step": 19000, "training_step_in_ms": 2831.926313831471 }, { "_do_backward_in_ms": 2530.6621135415044, "_prepare_inputs_in_ms": 3.3357447627931833, "compute_loss_in_ms": 289.85987694049254, "epoch": 0.73, "learning_rate/full": 9.063048903303299e-05, "loss": 1.8557, "step": 20000, "training_step_in_ms": 2829.814629596658 }, { "epoch": 0.73, "eval_objects365-local-v2-validation_loss": 2.4183120727539062, "eval_objects365-local-v2-validation_runtime": 19.7661, "eval_objects365-local-v2-validation_samples_per_second": 40.473, "eval_objects365-local-v2-validation_steps_per_second": 0.658, "step": 20000 }, { "_do_backward_in_ms": 2476.3556728472468, "_prepare_inputs_in_ms": 17.271267849380134, "compute_loss_in_ms": 289.4574518314115, "epoch": 0.77, "learning_rate/full": 8.969416096468137e-05, "loss": 1.8426, "step": 21000, "training_step_in_ms": 2775.230575547088 }, { "_do_backward_in_ms": 2468.883111936506, "_prepare_inputs_in_ms": 3.332945456728339, "compute_loss_in_ms": 289.88871885929257, "epoch": 0.81, "learning_rate/full": 8.871858101523774e-05, "loss": 1.8415, "step": 22000, "training_step_in_ms": 2768.1600090207066 }, { "_do_backward_in_ms": 2506.8567285528407, "_prepare_inputs_in_ms": 3.3408744835760444, "compute_loss_in_ms": 289.73892450495623, "epoch": 0.84, "learning_rate/full": 8.770471389455464e-05, "loss": 1.8293, "step": 23000, "training_step_in_ms": 2805.954835511511 }, { "_do_backward_in_ms": 2541.85831053718, "_prepare_inputs_in_ms": 3.357209531823173, "compute_loss_in_ms": 290.00427257712, "epoch": 0.88, "learning_rate/full": 8.665356217305291e-05, "loss": 1.8266, "step": 24000, "training_step_in_ms": 2841.2785381632857 }, { "_do_backward_in_ms": 2533.210183262825, "_prepare_inputs_in_ms": 3.364060287596658, "compute_loss_in_ms": 289.9935355405323, "epoch": 0.92, "learning_rate/full": 8.556616529032215e-05, "loss": 1.8162, "step": 25000, "training_step_in_ms": 2832.5927242138423 }, { "epoch": 0.92, "eval_objects365-local-v2-validation_loss": 2.3316762447357178, "eval_objects365-local-v2-validation_runtime": 10.2829, "eval_objects365-local-v2-validation_samples_per_second": 77.799, "eval_objects365-local-v2-validation_steps_per_second": 1.264, "step": 25000 }, { "_do_backward_in_ms": 2552.305114510702, "_prepare_inputs_in_ms": 8.565855091612377, "compute_loss_in_ms": 289.73671219767664, "epoch": 0.96, "learning_rate/full": 8.444359852726274e-05, "loss": 1.8117, "step": 26000, "training_step_in_ms": 2851.485752185108 }, { "_do_backward_in_ms": 2533.5027522125747, "_prepare_inputs_in_ms": 3.3632643420714885, "compute_loss_in_ms": 289.90648507210426, "epoch": 0.99, "learning_rate/full": 8.328579747384175e-05, "loss": 1.806, "step": 27000, "training_step_in_ms": 2832.807456281269 }, { "_do_backward_in_ms": 2546.2225131660234, "_prepare_inputs_in_ms": 3.3682647191453725, "compute_loss_in_ms": 291.1998807019554, "epoch": 1.03, "learning_rate/full": 8.209501557634378e-05, "loss": 1.7955, "step": 28000, "training_step_in_ms": 2846.875886055641 }, { "_do_backward_in_ms": 2583.1195299711544, "_prepare_inputs_in_ms": 3.3649198710918427, "compute_loss_in_ms": 290.97770567121916, "epoch": 1.07, "learning_rate/full": 8.087367077395005e-05, "loss": 1.7877, "step": 29000, "training_step_in_ms": 2883.5424125664867 }, { "_do_backward_in_ms": 2570.092649807455, "_prepare_inputs_in_ms": 3.4090082803741097, "compute_loss_in_ms": 291.62430305662565, "epoch": 1.1, "learning_rate/full": 7.962179630107982e-05, "loss": 1.786, "step": 30000, "training_step_in_ms": 2871.2795688530896 }, { "epoch": 1.1, "eval_objects365-local-v2-validation_loss": 2.2865772247314453, "eval_objects365-local-v2-validation_runtime": 8.6589, "eval_objects365-local-v2-validation_samples_per_second": 92.391, "eval_objects365-local-v2-validation_steps_per_second": 1.501, "step": 30000 }, { "_do_backward_in_ms": 721.1331692694221, "_prepare_inputs_in_ms": 3.2962444906588644, "compute_loss_in_ms": 299.71373338252306, "epoch": 1.14, "learning_rate/full": 9.998418572322853e-05, "loss": 1.7871, "step": 31000, "training_step_in_ms": 1029.60611718148 }, { "_do_backward_in_ms": 729.436703273328, "_prepare_inputs_in_ms": 3.258270466234535, "compute_loss_in_ms": 298.0247277948074, "epoch": 1.18, "learning_rate/full": 9.991993511089866e-05, "loss": 1.7845, "step": 32000, "training_step_in_ms": 1036.1627226730343 }, { "_do_backward_in_ms": 703.6630942693446, "_prepare_inputs_in_ms": 3.2456943639554083, "compute_loss_in_ms": 297.64328660978936, "epoch": 1.21, "learning_rate/full": 9.98061823549655e-05, "loss": 1.7817, "step": 33000, "training_step_in_ms": 1009.9646131193731 }, { "_do_backward_in_ms": 676.1822207763325, "_prepare_inputs_in_ms": 3.2518609322141856, "compute_loss_in_ms": 297.877519285772, "epoch": 1.25, "learning_rate/full": 9.964326742751142e-05, "loss": 1.7795, "step": 34000, "training_step_in_ms": 982.7506944458 }, { "_do_backward_in_ms": 624.6346801738255, "_prepare_inputs_in_ms": 3.2795886190142483, "compute_loss_in_ms": 298.1166247774381, "epoch": 1.29, "learning_rate/full": 9.943126236733435e-05, "loss": 1.7769, "step": 35000, "training_step_in_ms": 931.5135621828958 }, { "epoch": 1.29, "eval_objects365-local-v2-validation_loss": 2.2724013328552246, "eval_objects365-local-v2-validation_runtime": 12.4784, "eval_objects365-local-v2-validation_samples_per_second": 64.111, "eval_objects365-local-v2-validation_steps_per_second": 1.042, "step": 35000 }, { "_do_backward_in_ms": 655.5177808874287, "_prepare_inputs_in_ms": 7.8633947225300425, "compute_loss_in_ms": 297.8193518089228, "epoch": 1.32, "learning_rate/full": 9.917037681729384e-05, "loss": 1.7745, "step": 36000, "training_step_in_ms": 962.1334383783396 }, { "_do_backward_in_ms": 699.8656730484217, "_prepare_inputs_in_ms": 3.240072426153347, "compute_loss_in_ms": 298.73781585809775, "epoch": 1.36, "learning_rate/full": 9.886053467655043e-05, "loss": 1.7686, "step": 37000, "training_step_in_ms": 1007.3340494644362 }, { "_do_backward_in_ms": 698.6605496255215, "_prepare_inputs_in_ms": 3.2106620045378804, "compute_loss_in_ms": 299.0653761769645, "epoch": 1.4, "learning_rate/full": 9.850266196818751e-05, "loss": 1.7686, "step": 38000, "training_step_in_ms": 1006.4037391303573 }, { "_do_backward_in_ms": 724.4765353850089, "_prepare_inputs_in_ms": 3.3089412197005004, "compute_loss_in_ms": 298.82350925635546, "epoch": 1.43, "learning_rate/full": 9.80968270237185e-05, "loss": 1.763, "step": 39000, "training_step_in_ms": 1032.1099421884865 }, { "_do_backward_in_ms": 681.9796452447772, "_prepare_inputs_in_ms": 3.263611613307148, "compute_loss_in_ms": 298.8235752664041, "epoch": 1.47, "learning_rate/full": 9.764343115619788e-05, "loss": 1.7614, "step": 40000, "training_step_in_ms": 989.5278342715465 }, { "epoch": 1.47, "eval_objects365-local-v2-validation_loss": 2.228322982788086, "eval_objects365-local-v2-validation_runtime": 16.6211, "eval_objects365-local-v2-validation_samples_per_second": 48.132, "eval_objects365-local-v2-validation_steps_per_second": 0.782, "step": 40000 }, { "_do_backward_in_ms": 720.4739852924831, "_prepare_inputs_in_ms": 12.448386072667512, "compute_loss_in_ms": 298.166242549251, "epoch": 1.51, "learning_rate/full": 9.714292270967042e-05, "loss": 1.7603, "step": 41000, "training_step_in_ms": 1027.4099870913196 }, { "_do_backward_in_ms": 658.6267548131291, "_prepare_inputs_in_ms": 3.201066299341619, "compute_loss_in_ms": 298.838454146171, "epoch": 1.54, "learning_rate/full": 9.659579661582255e-05, "loss": 1.7573, "step": 42000, "training_step_in_ms": 966.1794079060201 }, { "_do_backward_in_ms": 676.473127261037, "_prepare_inputs_in_ms": 3.228976390324533, "compute_loss_in_ms": 298.2980008148588, "epoch": 1.58, "learning_rate/full": 9.600197721584953e-05, "loss": 1.7545, "step": 43000, "training_step_in_ms": 983.4983903854154 }, { "_do_backward_in_ms": 697.463578726165, "_prepare_inputs_in_ms": 3.195912489667535, "compute_loss_in_ms": 299.1437525388319, "epoch": 1.62, "learning_rate/full": 9.536323925372398e-05, "loss": 1.7526, "step": 44000, "training_step_in_ms": 1005.2843026786577 }, { "_do_backward_in_ms": 813.7108269445598, "_prepare_inputs_in_ms": 3.206419989466667, "compute_loss_in_ms": 298.0525588088203, "epoch": 1.65, "learning_rate/full": 9.467964349816328e-05, "loss": 1.7477, "step": 45000, "training_step_in_ms": 1120.4696311727166 }, { "epoch": 1.65, "eval_objects365-local-v2-validation_loss": 2.1873245239257812, "eval_objects365-local-v2-validation_runtime": 12.2979, "eval_objects365-local-v2-validation_samples_per_second": 65.052, "eval_objects365-local-v2-validation_steps_per_second": 1.057, "step": 45000 }, { "_do_backward_in_ms": 708.7151438989677, "_prepare_inputs_in_ms": 8.210845838180447, "compute_loss_in_ms": 297.9545111299988, "epoch": 1.69, "learning_rate/full": 9.395186592816932e-05, "loss": 1.7454, "step": 46000, "training_step_in_ms": 1015.4890209392179 }, { "_do_backward_in_ms": 677.8432183256373, "_prepare_inputs_in_ms": 3.263900319347158, "compute_loss_in_ms": 297.8792646545917, "epoch": 1.73, "learning_rate/full": 9.31806262122764e-05, "loss": 1.7443, "step": 47000, "training_step_in_ms": 984.4030563381966 }, { "_do_backward_in_ms": 720.129483740544, "_prepare_inputs_in_ms": 3.2109336624853313, "compute_loss_in_ms": 297.79731379216537, "epoch": 1.76, "learning_rate/full": 9.23666869969011e-05, "loss": 1.7429, "step": 48000, "training_step_in_ms": 1026.6044916820247 }, { "_do_backward_in_ms": 655.5070220401976, "_prepare_inputs_in_ms": 3.2323538628406823, "compute_loss_in_ms": 297.94803192745894, "epoch": 1.8, "learning_rate/full": 9.15108531521937e-05, "loss": 1.7414, "step": 49000, "training_step_in_ms": 962.1004992513917 }, { "_do_backward_in_ms": 672.1013942162972, "_prepare_inputs_in_ms": 3.2201343055348843, "compute_loss_in_ms": 298.3921029092744, "epoch": 1.84, "learning_rate/full": 9.061305292392976e-05, "loss": 1.7364, "step": 50000, "training_step_in_ms": 979.1467305382248 }, { "epoch": 1.84, "eval_objects365-local-v2-validation_loss": 2.164118766784668, "eval_objects365-local-v2-validation_runtime": 10.1507, "eval_objects365-local-v2-validation_samples_per_second": 78.812, "eval_objects365-local-v2-validation_steps_per_second": 1.281, "step": 50000 }, { "_do_backward_in_ms": 674.3136331241112, "_prepare_inputs_in_ms": 6.509397196586208, "compute_loss_in_ms": 298.9732424640313, "epoch": 1.87, "learning_rate/full": 8.967692735767203e-05, "loss": 1.7366, "step": 51000, "training_step_in_ms": 982.0552003385965 }, { "_do_backward_in_ms": 697.6172431716695, "_prepare_inputs_in_ms": 3.218969340668991, "compute_loss_in_ms": 298.4035959227476, "epoch": 1.91, "learning_rate/full": 8.86996523066913e-05, "loss": 1.735, "step": 52000, "training_step_in_ms": 1004.6795647891704 }, { "_do_backward_in_ms": 704.6787912775762, "_prepare_inputs_in_ms": 3.23072279850021, "compute_loss_in_ms": 298.14702042611316, "epoch": 1.95, "learning_rate/full": 8.768506659844343e-05, "loss": 1.7307, "step": 53000, "training_step_in_ms": 1011.5234109486919 }, { "_do_backward_in_ms": 769.2823166255839, "_prepare_inputs_in_ms": 3.204921918688342, "compute_loss_in_ms": 298.67282255436294, "epoch": 1.98, "learning_rate/full": 8.663321571775915e-05, "loss": 1.7305, "step": 54000, "training_step_in_ms": 1076.5878808272537 }, { "_do_backward_in_ms": 696.9331407416612, "_prepare_inputs_in_ms": 3.244183993898332, "compute_loss_in_ms": 298.70368046709336, "epoch": 2.02, "learning_rate/full": 8.554513979559709e-05, "loss": 1.7214, "step": 55000, "training_step_in_ms": 1004.3571789248381 }, { "epoch": 2.02, "eval_objects365-local-v2-validation_loss": 2.1291964054107666, "eval_objects365-local-v2-validation_runtime": 9.885, "eval_objects365-local-v2-validation_samples_per_second": 80.931, "eval_objects365-local-v2-validation_steps_per_second": 1.315, "step": 55000 }, { "_do_backward_in_ms": 653.2630679495633, "_prepare_inputs_in_ms": 6.026035463639699, "compute_loss_in_ms": 299.06775847170223, "epoch": 2.06, "learning_rate/full": 8.442077319354145e-05, "loss": 1.7136, "step": 56000, "training_step_in_ms": 961.0975561500527 }, { "_do_backward_in_ms": 745.036297386745, "_prepare_inputs_in_ms": 3.249741542385891, "compute_loss_in_ms": 297.8976241340861, "epoch": 2.09, "learning_rate/full": 8.326347629835318e-05, "loss": 1.7138, "step": 57000, "training_step_in_ms": 1051.66180648515 }, { "_do_backward_in_ms": 626.0586955258623, "_prepare_inputs_in_ms": 3.230042038485408, "compute_loss_in_ms": 298.4531378012616, "epoch": 2.13, "learning_rate/full": 8.207328655483055e-05, "loss": 1.7121, "step": 58000, "training_step_in_ms": 933.1826650444418 }, { "_do_backward_in_ms": 704.3030783196446, "_prepare_inputs_in_ms": 3.246616828488186, "compute_loss_in_ms": 297.92309659463353, "epoch": 2.17, "learning_rate/full": 8.085138089139716e-05, "loss": 1.712, "step": 59000, "training_step_in_ms": 1010.959731190931 }, { "_do_backward_in_ms": 695.9465066853445, "_prepare_inputs_in_ms": 3.237966085318476, "compute_loss_in_ms": 298.90500363637693, "epoch": 2.2, "learning_rate/full": 7.95989675990117e-05, "loss": 1.7112, "step": 60000, "training_step_in_ms": 1003.5448978319764 }, { "epoch": 2.2, "eval_objects365-local-v2-validation_loss": 2.1127543449401855, "eval_objects365-local-v2-validation_runtime": 8.4515, "eval_objects365-local-v2-validation_samples_per_second": 94.657, "eval_objects365-local-v2-validation_steps_per_second": 1.538, "step": 60000 }, { "_do_backward_in_ms": 716.5119342987891, "_prepare_inputs_in_ms": 5.995148892154933, "compute_loss_in_ms": 299.30640732260883, "epoch": 2.24, "learning_rate/full": 7.831598792818578e-05, "loss": 1.7064, "step": 61000, "training_step_in_ms": 1024.6346847999375 }, { "_do_backward_in_ms": 679.5819691745564, "_prepare_inputs_in_ms": 3.2997133519966155, "compute_loss_in_ms": 298.4492077725008, "epoch": 2.28, "learning_rate/full": 7.700627631147224e-05, "loss": 1.7071, "step": 62000, "training_step_in_ms": 986.8024183101952 }, { "_do_backward_in_ms": 696.5671245567501, "_prepare_inputs_in_ms": 3.2379625719040632, "compute_loss_in_ms": 298.1275162412785, "epoch": 2.31, "learning_rate/full": 7.566850863280712e-05, "loss": 1.7073, "step": 63000, "training_step_in_ms": 1003.3855868254323 }, { "_do_backward_in_ms": 788.7977120715659, "_prepare_inputs_in_ms": 3.261386409169063, "compute_loss_in_ms": 297.86948832380585, "epoch": 2.35, "learning_rate/full": 7.430805850264685e-05, "loss": 1.7044, "step": 64000, "training_step_in_ms": 1095.416541912593 }, { "_do_backward_in_ms": 679.518492219504, "_prepare_inputs_in_ms": 3.206374272936955, "compute_loss_in_ms": 298.958141958341, "epoch": 2.39, "learning_rate/full": 7.292361925349194e-05, "loss": 1.7045, "step": 65000, "training_step_in_ms": 987.1717654119711 }, { "epoch": 2.39, "eval_objects365-local-v2-validation_loss": 2.0909905433654785, "eval_objects365-local-v2-validation_runtime": 9.9304, "eval_objects365-local-v2-validation_samples_per_second": 80.561, "eval_objects365-local-v2-validation_steps_per_second": 1.309, "step": 65000 }, { "_do_backward_in_ms": 673.5720948528033, "_prepare_inputs_in_ms": 6.18732621827092, "compute_loss_in_ms": 297.60415547758134, "epoch": 2.42, "learning_rate/full": 7.151371560919644e-05, "loss": 1.702, "step": 66000, "training_step_in_ms": 980.0054668276571 }, { "_do_backward_in_ms": 654.3511845318135, "_prepare_inputs_in_ms": 3.2777874602470547, "compute_loss_in_ms": 298.2891470948234, "epoch": 2.46, "learning_rate/full": 7.008393674839574e-05, "loss": 1.7017, "step": 67000, "training_step_in_ms": 961.3662255166564 }, { "_do_backward_in_ms": 674.1237894105725, "_prepare_inputs_in_ms": 3.2963655965868384, "compute_loss_in_ms": 298.1315166691784, "epoch": 2.5, "learning_rate/full": 6.863429772988044e-05, "loss": 1.7001, "step": 68000, "training_step_in_ms": 980.9758842557203 }, { "_do_backward_in_ms": 651.5303289373405, "_prepare_inputs_in_ms": 3.2912338944151998, "compute_loss_in_ms": 297.70729713048786, "epoch": 2.53, "learning_rate/full": 6.716475375716726e-05, "loss": 1.6992, "step": 69000, "training_step_in_ms": 957.9476293225307 }, { "_do_backward_in_ms": 701.2599963427056, "_prepare_inputs_in_ms": 3.2801425319630653, "compute_loss_in_ms": 297.9513049093075, "epoch": 2.57, "learning_rate/full": 6.568119138852548e-05, "loss": 1.6991, "step": 70000, "training_step_in_ms": 1007.9343056466896 }, { "epoch": 2.57, "eval_objects365-local-v2-validation_loss": 2.0831971168518066, "eval_objects365-local-v2-validation_runtime": 13.482, "eval_objects365-local-v2-validation_samples_per_second": 59.338, "eval_objects365-local-v2-validation_steps_per_second": 0.964, "step": 70000 }, { "_do_backward_in_ms": 684.7173160158563, "_prepare_inputs_in_ms": 9.682496656596602, "compute_loss_in_ms": 298.04157538694375, "epoch": 2.61, "learning_rate/full": 6.41791349446638e-05, "loss": 1.6959, "step": 71000, "training_step_in_ms": 991.6520508083049 }, { "_do_backward_in_ms": 719.4974615401588, "_prepare_inputs_in_ms": 3.2725699762813747, "compute_loss_in_ms": 298.8009250371251, "epoch": 2.64, "learning_rate/full": 6.266455189473463e-05, "loss": 1.6968, "step": 72000, "training_step_in_ms": 1027.078579184832 }, { "_do_backward_in_ms": 630.4531340918038, "_prepare_inputs_in_ms": 3.2720213141292334, "compute_loss_in_ms": 298.3485946042929, "epoch": 2.68, "learning_rate/full": 6.11374454037508e-05, "loss": 1.6951, "step": 73000, "training_step_in_ms": 937.6128895445727 }, { "_do_backward_in_ms": 673.5870531778783, "_prepare_inputs_in_ms": 3.220968232722953, "compute_loss_in_ms": 298.4312469626311, "epoch": 2.72, "learning_rate/full": 5.9599325562893006e-05, "loss": 1.6948, "step": 74000, "training_step_in_ms": 980.7399763246067 }, { "_do_backward_in_ms": 670.7334653646685, "_prepare_inputs_in_ms": 3.225922678830102, "compute_loss_in_ms": 298.11172814434394, "epoch": 2.75, "learning_rate/full": 5.8050159947610774e-05, "loss": 1.6937, "step": 75000, "training_step_in_ms": 977.5889939961489 }, { "epoch": 2.75, "eval_objects365-local-v2-validation_loss": 2.060175895690918, "eval_objects365-local-v2-validation_runtime": 10.0939, "eval_objects365-local-v2-validation_samples_per_second": 79.255, "eval_objects365-local-v2-validation_steps_per_second": 1.288, "step": 75000 }, { "_do_backward_in_ms": 707.8241047970951, "_prepare_inputs_in_ms": 7.805172383402789, "compute_loss_in_ms": 298.5676535603343, "epoch": 2.79, "learning_rate/full": 5.649457853856564e-05, "loss": 1.692, "step": 76000, "training_step_in_ms": 1015.2529031389859 }, { "_do_backward_in_ms": 679.5182852572761, "_prepare_inputs_in_ms": 3.2479509462136775, "compute_loss_in_ms": 298.08279935712926, "epoch": 2.83, "learning_rate/full": 5.4932574914808247e-05, "loss": 1.6897, "step": 77000, "training_step_in_ms": 986.3572093644179 }, { "_do_backward_in_ms": 624.3718270168174, "_prepare_inputs_in_ms": 3.2478769938461483, "compute_loss_in_ms": 298.39406253327616, "epoch": 2.87, "learning_rate/full": 5.336569367582159e-05, "loss": 1.6903, "step": 78000, "training_step_in_ms": 931.5286971002351 }, { "_do_backward_in_ms": 699.6423877081834, "_prepare_inputs_in_ms": 3.2958002001978457, "compute_loss_in_ms": 298.47152298805304, "epoch": 2.9, "learning_rate/full": 5.179548424435664e-05, "loss": 1.6903, "step": 79000, "training_step_in_ms": 1006.8662925218232 }, { "_do_backward_in_ms": 721.8450685073622, "_prepare_inputs_in_ms": 3.2630610479973257, "compute_loss_in_ms": 298.69656310253777, "epoch": 2.94, "learning_rate/full": 5.0223499334273436e-05, "loss": 1.6861, "step": 80000, "training_step_in_ms": 1029.3176082074642 }, { "epoch": 2.94, "eval_objects365-local-v2-validation_loss": 2.0467376708984375, "eval_objects365-local-v2-validation_runtime": 12.3724, "eval_objects365-local-v2-validation_samples_per_second": 64.66, "eval_objects365-local-v2-validation_steps_per_second": 1.051, "step": 80000 }, { "_do_backward_in_ms": 745.3300231467001, "_prepare_inputs_in_ms": 8.417346246764874, "compute_loss_in_ms": 297.6633286998154, "epoch": 2.98, "learning_rate/full": 4.8651293415127954e-05, "loss": 1.6859, "step": 81000, "training_step_in_ms": 1051.9083714645822 }, { "_do_backward_in_ms": 713.2887438628823, "_prepare_inputs_in_ms": 3.309611749369651, "compute_loss_in_ms": 299.24693407770246, "epoch": 3.01, "learning_rate/full": 4.7080421175022657e-05, "loss": 1.6808, "step": 82000, "training_step_in_ms": 1021.3982793183532 }, { "_do_backward_in_ms": 683.561601414578, "_prepare_inputs_in_ms": 3.340075889835134, "compute_loss_in_ms": 299.84076196700335, "epoch": 3.05, "learning_rate/full": 4.551243598324116e-05, "loss": 1.6701, "step": 83000, "training_step_in_ms": 992.2671003735159 }, { "_do_backward_in_ms": 691.1516734741163, "_prepare_inputs_in_ms": 3.3659417459275573, "compute_loss_in_ms": 298.7288506310433, "epoch": 3.09, "learning_rate/full": 4.394888835418673e-05, "loss": 1.6682, "step": 84000, "training_step_in_ms": 998.8259057179093 }, { "_do_backward_in_ms": 671.9453688920476, "_prepare_inputs_in_ms": 3.3872235738672316, "compute_loss_in_ms": 299.7595350977499, "epoch": 3.12, "learning_rate/full": 4.238976880417727e-05, "loss": 1.6682, "step": 85000, "training_step_in_ms": 980.6325940783136 }, { "epoch": 3.12, "eval_objects365-local-v2-validation_loss": 2.037609815597534, "eval_objects365-local-v2-validation_runtime": 8.3572, "eval_objects365-local-v2-validation_samples_per_second": 95.726, "eval_objects365-local-v2-validation_steps_per_second": 1.556, "step": 85000 }, { "_do_backward_in_ms": 682.5501747601666, "_prepare_inputs_in_ms": 6.052993975997133, "compute_loss_in_ms": 298.15280754882235, "epoch": 3.16, "learning_rate/full": 4.083818976295859e-05, "loss": 1.6684, "step": 86000, "training_step_in_ms": 989.5919032730162 }, { "_do_backward_in_ms": 702.3105279654264, "_prepare_inputs_in_ms": 3.27472277241759, "compute_loss_in_ms": 298.05750323599204, "epoch": 3.2, "learning_rate/full": 3.929722605177466e-05, "loss": 1.6664, "step": 87000, "training_step_in_ms": 1009.1526904681232 }, { "_do_backward_in_ms": 647.5646377876401, "_prepare_inputs_in_ms": 3.2817639869172126, "compute_loss_in_ms": 299.0744258032646, "epoch": 3.23, "learning_rate/full": 3.776684586220099e-05, "loss": 1.6671, "step": 88000, "training_step_in_ms": 955.4352292607073 }, { "_do_backward_in_ms": 1546.6698298668489, "_prepare_inputs_in_ms": 3.2690847222693264, "compute_loss_in_ms": 298.1887877949048, "epoch": 3.27, "learning_rate/full": 3.6248562522640714e-05, "loss": 1.6665, "step": 89000, "training_step_in_ms": 1853.662267722888 }, { "_do_backward_in_ms": 1196.733351110248, "_prepare_inputs_in_ms": 3.2732989322394133, "compute_loss_in_ms": 298.05953590921126, "epoch": 3.31, "learning_rate/full": 3.4743877399432354e-05, "loss": 1.6673, "step": 90000, "training_step_in_ms": 1503.5614393500146 }, { "epoch": 3.31, "eval_objects365-local-v2-validation_loss": 2.015079975128174, "eval_objects365-local-v2-validation_runtime": 21.4395, "eval_objects365-local-v2-validation_samples_per_second": 37.314, "eval_objects365-local-v2-validation_steps_per_second": 0.606, "step": 90000 }, { "_do_backward_in_ms": 844.9975665507372, "_prepare_inputs_in_ms": 19.031029790805363, "compute_loss_in_ms": 298.18785645280065, "epoch": 3.34, "learning_rate/full": 3.325427841221202e-05, "loss": 1.6653, "step": 91000, "training_step_in_ms": 1152.0402662665583 }, { "_do_backward_in_ms": 1347.8918932015076, "_prepare_inputs_in_ms": 3.3262957674451172, "compute_loss_in_ms": 298.0921304386575, "epoch": 3.38, "learning_rate/full": 3.178123856257234e-05, "loss": 1.6655, "step": 92000, "training_step_in_ms": 1654.7804647334851 }, { "_do_backward_in_ms": 1063.0321979811415, "_prepare_inputs_in_ms": 3.2982348231598735, "compute_loss_in_ms": 298.2737292505335, "epoch": 3.42, "learning_rate/full": 3.0326214477473512e-05, "loss": 1.6623, "step": 93000, "training_step_in_ms": 1370.0481948212255 }, { "_do_backward_in_ms": 675.3600625551771, "_prepare_inputs_in_ms": 3.2861924229655415, "compute_loss_in_ms": 297.8690059813671, "epoch": 3.45, "learning_rate/full": 2.8890644968846193e-05, "loss": 1.6633, "step": 94000, "training_step_in_ms": 981.9639976194594 }, { "_do_backward_in_ms": 685.7196875785012, "_prepare_inputs_in_ms": 3.2590834801085293, "compute_loss_in_ms": 298.93962507206015, "epoch": 3.49, "learning_rate/full": 2.74745444265936e-05, "loss": 1.6637, "step": 95000, "training_step_in_ms": 993.3962683330756 }, { "epoch": 3.49, "eval_objects365-local-v2-validation_loss": 2.0223588943481445, "eval_objects365-local-v2-validation_runtime": 13.6125, "eval_objects365-local-v2-validation_samples_per_second": 58.769, "eval_objects365-local-v2-validation_steps_per_second": 0.955, "step": 95000 }, { "_do_backward_in_ms": 663.6111688169185, "_prepare_inputs_in_ms": 11.124133992699312, "compute_loss_in_ms": 298.8600466307988, "epoch": 3.53, "learning_rate/full": 2.608214514070504e-05, "loss": 1.6625, "step": 96000, "training_step_in_ms": 971.3277539745905 }, { "_do_backward_in_ms": 862.6617220155895, "_prepare_inputs_in_ms": 3.263724277028814, "compute_loss_in_ms": 298.15222160494886, "epoch": 3.56, "learning_rate/full": 2.471339721239901e-05, "loss": 1.6617, "step": 97000, "training_step_in_ms": 1169.5883399837185 }, { "_do_backward_in_ms": 792.2627358706668, "_prepare_inputs_in_ms": 3.239450325258076, "compute_loss_in_ms": 298.1890718040522, "epoch": 3.6, "learning_rate/full": 2.3370986279672524e-05, "loss": 1.6611, "step": 98000, "training_step_in_ms": 1099.1772187727038 }, { "_do_backward_in_ms": 821.3568878679071, "_prepare_inputs_in_ms": 3.2639728772919625, "compute_loss_in_ms": 298.9591815781314, "epoch": 3.64, "learning_rate/full": 2.2053549815720452e-05, "loss": 1.6597, "step": 99000, "training_step_in_ms": 1129.0956585616805 }, { "_do_backward_in_ms": 841.5605684022885, "_prepare_inputs_in_ms": 3.277285093674436, "compute_loss_in_ms": 298.11495484854095, "epoch": 3.67, "learning_rate/full": 2.0762471598573356e-05, "loss": 1.6591, "step": 100000, "training_step_in_ms": 1148.4819840774871 }, { "epoch": 3.67, "eval_objects365-local-v2-validation_loss": 2.003035545349121, "eval_objects365-local-v2-validation_runtime": 15.3967, "eval_objects365-local-v2-validation_samples_per_second": 51.959, "eval_objects365-local-v2-validation_steps_per_second": 0.844, "step": 100000 } ], "max_steps": 100000, "num_train_epochs": 4, "total_flos": 1.6535680982674692e+23, "trial_name": null, "trial_params": null }