diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,2023 +1,6685 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 3.0, - "global_step": 333, + "epoch": 10.0, + "global_step": 1110, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, - "learning_rate": 1.993993993993994e-05, + "learning_rate": 0.00019981981981981984, "loss": 9.3363, "step": 1 }, { "epoch": 0.02, - "learning_rate": 1.987987987987988e-05, - "loss": 6.3834, + "learning_rate": 0.00019963963963963967, + "loss": 5.0479, "step": 2 }, { "epoch": 0.03, - "learning_rate": 1.981981981981982e-05, - "loss": 5.1305, + "learning_rate": 0.00019945945945945947, + "loss": 4.3977, "step": 3 }, { "epoch": 0.04, - "learning_rate": 1.9759759759759763e-05, - "loss": 4.4537, + "learning_rate": 0.0001992792792792793, + "loss": 3.9062, "step": 4 }, { "epoch": 0.05, - "learning_rate": 1.9699699699699702e-05, - "loss": 4.0593, + "learning_rate": 0.00019909909909909912, + "loss": 3.6201, "step": 5 }, { "epoch": 0.05, - "learning_rate": 1.963963963963964e-05, - "loss": 3.8647, + "learning_rate": 0.00019891891891891895, + "loss": 3.2993, "step": 6 }, { "epoch": 0.06, - "learning_rate": 1.957957957957958e-05, - "loss": 3.5701, + "learning_rate": 0.00019873873873873873, + "loss": 2.7671, "step": 7 }, { "epoch": 0.07, - "learning_rate": 1.951951951951952e-05, - "loss": 3.594, + "learning_rate": 0.00019855855855855856, + "loss": 2.8142, "step": 8 }, { "epoch": 0.08, - "learning_rate": 1.9459459459459463e-05, - "loss": 3.5468, + "learning_rate": 0.00019837837837837838, + "loss": 2.6968, "step": 9 }, { "epoch": 0.09, - "learning_rate": 1.9399399399399402e-05, - "loss": 3.2149, + "learning_rate": 0.0001981981981981982, + "loss": 2.3751, "step": 10 }, { "epoch": 0.1, - "learning_rate": 1.9339339339339342e-05, - "loss": 3.2163, + "learning_rate": 0.000198018018018018, + "loss": 2.2539, "step": 11 }, { "epoch": 0.11, - "learning_rate": 1.927927927927928e-05, - "loss": 3.0711, + "learning_rate": 0.00019783783783783784, + "loss": 2.2155, "step": 12 }, { "epoch": 0.12, - "learning_rate": 1.921921921921922e-05, - "loss": 2.9672, + "learning_rate": 0.00019765765765765767, + "loss": 2.1593, "step": 13 }, { "epoch": 0.13, - "learning_rate": 1.915915915915916e-05, - "loss": 2.9493, + "learning_rate": 0.0001974774774774775, + "loss": 2.1809, "step": 14 }, { "epoch": 0.14, - "learning_rate": 1.90990990990991e-05, - "loss": 3.1754, + "learning_rate": 0.0001972972972972973, + "loss": 2.4524, "step": 15 }, { "epoch": 0.14, - "learning_rate": 1.903903903903904e-05, - "loss": 2.8729, + "learning_rate": 0.00019711711711711713, + "loss": 2.1628, "step": 16 }, { "epoch": 0.15, - "learning_rate": 1.8978978978978982e-05, - "loss": 2.8177, + "learning_rate": 0.00019693693693693696, + "loss": 2.1451, "step": 17 }, { "epoch": 0.16, - "learning_rate": 1.891891891891892e-05, - "loss": 2.7751, + "learning_rate": 0.00019675675675675678, + "loss": 2.1149, "step": 18 }, { "epoch": 0.17, - "learning_rate": 1.885885885885886e-05, - "loss": 2.7206, + "learning_rate": 0.00019657657657657659, + "loss": 1.8829, "step": 19 }, { "epoch": 0.18, - "learning_rate": 1.87987987987988e-05, - "loss": 2.6308, + "learning_rate": 0.0001963963963963964, + "loss": 1.752, "step": 20 }, { "epoch": 0.19, - "learning_rate": 1.873873873873874e-05, - "loss": 2.6611, + "learning_rate": 0.00019621621621621622, + "loss": 1.9402, "step": 21 }, { "epoch": 0.2, - "learning_rate": 1.8678678678678682e-05, - "loss": 2.6, + "learning_rate": 0.00019603603603603604, + "loss": 1.915, "step": 22 }, { "epoch": 0.21, - "learning_rate": 1.861861861861862e-05, - "loss": 2.3343, + "learning_rate": 0.00019585585585585587, + "loss": 1.5993, "step": 23 }, { "epoch": 0.22, - "learning_rate": 1.855855855855856e-05, - "loss": 2.5656, + "learning_rate": 0.00019567567567567567, + "loss": 1.8401, "step": 24 }, { "epoch": 0.23, - "learning_rate": 1.84984984984985e-05, - "loss": 2.6288, + "learning_rate": 0.0001954954954954955, + "loss": 2.0089, "step": 25 }, { "epoch": 0.23, - "learning_rate": 1.843843843843844e-05, - "loss": 2.6641, + "learning_rate": 0.00019531531531531533, + "loss": 1.9848, "step": 26 }, { "epoch": 0.24, - "learning_rate": 1.8378378378378383e-05, - "loss": 2.2362, + "learning_rate": 0.00019513513513513516, + "loss": 1.6511, "step": 27 }, { "epoch": 0.25, - "learning_rate": 1.831831831831832e-05, - "loss": 2.2673, + "learning_rate": 0.00019495495495495496, + "loss": 1.6407, "step": 28 }, { "epoch": 0.26, - "learning_rate": 1.8258258258258258e-05, - "loss": 2.5034, + "learning_rate": 0.0001947747747747748, + "loss": 1.8594, "step": 29 }, { "epoch": 0.27, - "learning_rate": 1.81981981981982e-05, - "loss": 2.4348, + "learning_rate": 0.00019459459459459462, + "loss": 1.8043, "step": 30 }, { "epoch": 0.28, - "learning_rate": 1.813813813813814e-05, - "loss": 2.3081, + "learning_rate": 0.00019441441441441442, + "loss": 1.6376, "step": 31 }, { "epoch": 0.29, - "learning_rate": 1.807807807807808e-05, - "loss": 2.2955, + "learning_rate": 0.00019423423423423425, + "loss": 1.6761, "step": 32 }, { "epoch": 0.3, - "learning_rate": 1.801801801801802e-05, - "loss": 2.2767, + "learning_rate": 0.00019405405405405405, + "loss": 1.5476, "step": 33 }, { "epoch": 0.31, - "learning_rate": 1.795795795795796e-05, - "loss": 2.3092, + "learning_rate": 0.00019387387387387388, + "loss": 1.6939, "step": 34 }, { "epoch": 0.32, - "learning_rate": 1.78978978978979e-05, - "loss": 2.3261, + "learning_rate": 0.0001936936936936937, + "loss": 1.7951, "step": 35 }, { "epoch": 0.32, - "learning_rate": 1.783783783783784e-05, - "loss": 2.4289, + "learning_rate": 0.00019351351351351353, + "loss": 1.8343, "step": 36 }, { "epoch": 0.33, - "learning_rate": 1.7777777777777777e-05, - "loss": 2.4217, + "learning_rate": 0.00019333333333333333, + "loss": 1.8248, "step": 37 }, { "epoch": 0.34, - "learning_rate": 1.771771771771772e-05, - "loss": 2.3543, + "learning_rate": 0.00019315315315315316, + "loss": 1.7202, "step": 38 }, { "epoch": 0.35, - "learning_rate": 1.765765765765766e-05, - "loss": 2.2967, + "learning_rate": 0.000192972972972973, + "loss": 1.7938, "step": 39 }, { "epoch": 0.36, - "learning_rate": 1.7597597597597598e-05, - "loss": 2.3744, + "learning_rate": 0.00019279279279279282, + "loss": 1.7348, "step": 40 }, { "epoch": 0.37, - "learning_rate": 1.7537537537537538e-05, - "loss": 2.2404, + "learning_rate": 0.00019261261261261262, + "loss": 1.6271, "step": 41 }, { "epoch": 0.38, - "learning_rate": 1.7477477477477477e-05, - "loss": 2.0521, + "learning_rate": 0.00019243243243243245, + "loss": 1.5305, "step": 42 }, { "epoch": 0.39, - "learning_rate": 1.741741741741742e-05, - "loss": 2.1582, + "learning_rate": 0.00019225225225225225, + "loss": 1.6801, "step": 43 }, { "epoch": 0.4, - "learning_rate": 1.735735735735736e-05, - "loss": 2.2699, + "learning_rate": 0.00019207207207207208, + "loss": 1.6942, "step": 44 }, { "epoch": 0.41, - "learning_rate": 1.72972972972973e-05, - "loss": 2.1781, + "learning_rate": 0.0001918918918918919, + "loss": 1.7218, "step": 45 }, { "epoch": 0.41, - "learning_rate": 1.7237237237237238e-05, - "loss": 2.2529, + "learning_rate": 0.0001917117117117117, + "loss": 1.6827, "step": 46 }, { "epoch": 0.42, - "learning_rate": 1.7177177177177177e-05, - "loss": 2.0092, + "learning_rate": 0.00019153153153153154, + "loss": 1.4275, "step": 47 }, { "epoch": 0.43, - "learning_rate": 1.711711711711712e-05, - "loss": 2.2386, + "learning_rate": 0.00019135135135135137, + "loss": 1.605, "step": 48 }, { "epoch": 0.44, - "learning_rate": 1.705705705705706e-05, - "loss": 2.1246, + "learning_rate": 0.0001911711711711712, + "loss": 1.6862, "step": 49 }, { "epoch": 0.45, - "learning_rate": 1.6996996996997e-05, - "loss": 2.0728, + "learning_rate": 0.000190990990990991, + "loss": 1.529, "step": 50 }, { "epoch": 0.46, - "learning_rate": 1.693693693693694e-05, - "loss": 2.1956, + "learning_rate": 0.00019081081081081082, + "loss": 1.6986, "step": 51 }, { "epoch": 0.47, - "learning_rate": 1.6876876876876878e-05, - "loss": 2.1301, + "learning_rate": 0.00019063063063063065, + "loss": 1.6081, "step": 52 }, { "epoch": 0.48, - "learning_rate": 1.6816816816816817e-05, - "loss": 2.2538, + "learning_rate": 0.00019045045045045048, + "loss": 1.6748, "step": 53 }, { "epoch": 0.49, - "learning_rate": 1.6756756756756757e-05, - "loss": 2.0491, + "learning_rate": 0.00019027027027027028, + "loss": 1.3963, "step": 54 }, { "epoch": 0.5, - "learning_rate": 1.6696696696696696e-05, - "loss": 2.143, + "learning_rate": 0.00019009009009009008, + "loss": 1.6614, "step": 55 }, { "epoch": 0.5, - "learning_rate": 1.663663663663664e-05, - "loss": 2.0769, + "learning_rate": 0.0001899099099099099, + "loss": 1.4796, "step": 56 }, { "epoch": 0.51, - "learning_rate": 1.6576576576576578e-05, - "loss": 2.0398, + "learning_rate": 0.00018972972972972974, + "loss": 1.4965, "step": 57 }, { "epoch": 0.52, - "learning_rate": 1.6516516516516518e-05, - "loss": 2.1762, + "learning_rate": 0.00018954954954954957, + "loss": 1.5498, "step": 58 }, { "epoch": 0.53, - "learning_rate": 1.6456456456456457e-05, - "loss": 2.1344, + "learning_rate": 0.00018936936936936937, + "loss": 1.5678, "step": 59 }, { "epoch": 0.54, - "learning_rate": 1.6396396396396396e-05, - "loss": 1.8858, + "learning_rate": 0.0001891891891891892, + "loss": 1.4824, "step": 60 }, { "epoch": 0.55, - "learning_rate": 1.633633633633634e-05, - "loss": 2.0582, + "learning_rate": 0.00018900900900900903, + "loss": 1.4765, "step": 61 }, { "epoch": 0.56, - "learning_rate": 1.627627627627628e-05, - "loss": 2.0495, + "learning_rate": 0.00018882882882882885, + "loss": 1.5106, "step": 62 }, { "epoch": 0.57, - "learning_rate": 1.6216216216216218e-05, - "loss": 2.0432, + "learning_rate": 0.00018864864864864866, + "loss": 1.5217, "step": 63 }, { "epoch": 0.58, - "learning_rate": 1.6156156156156157e-05, - "loss": 2.0205, + "learning_rate": 0.00018846846846846848, + "loss": 1.5239, "step": 64 }, { "epoch": 0.59, - "learning_rate": 1.6096096096096097e-05, - "loss": 2.0229, + "learning_rate": 0.0001882882882882883, + "loss": 1.5392, "step": 65 }, { "epoch": 0.59, - "learning_rate": 1.6036036036036036e-05, - "loss": 1.9601, + "learning_rate": 0.00018810810810810814, + "loss": 1.4498, "step": 66 }, { "epoch": 0.6, - "learning_rate": 1.5975975975975976e-05, - "loss": 2.0695, + "learning_rate": 0.00018792792792792791, + "loss": 1.4997, "step": 67 }, { "epoch": 0.61, - "learning_rate": 1.591591591591592e-05, - "loss": 1.9346, + "learning_rate": 0.00018774774774774774, + "loss": 1.4124, "step": 68 }, { "epoch": 0.62, - "learning_rate": 1.5855855855855858e-05, - "loss": 2.11, + "learning_rate": 0.00018756756756756757, + "loss": 1.5549, "step": 69 }, { "epoch": 0.63, - "learning_rate": 1.5795795795795797e-05, - "loss": 2.0759, + "learning_rate": 0.0001873873873873874, + "loss": 1.431, "step": 70 }, { "epoch": 0.64, - "learning_rate": 1.5735735735735737e-05, - "loss": 1.9352, + "learning_rate": 0.0001872072072072072, + "loss": 1.3728, "step": 71 }, { "epoch": 0.65, - "learning_rate": 1.5675675675675676e-05, - "loss": 2.1723, + "learning_rate": 0.00018702702702702703, + "loss": 1.5376, "step": 72 }, { "epoch": 0.66, - "learning_rate": 1.5615615615615616e-05, - "loss": 2.0719, + "learning_rate": 0.00018684684684684686, + "loss": 1.6378, "step": 73 }, { "epoch": 0.67, - "learning_rate": 1.555555555555556e-05, - "loss": 1.9658, + "learning_rate": 0.0001866666666666667, + "loss": 1.4646, "step": 74 }, { "epoch": 0.68, - "learning_rate": 1.5495495495495498e-05, - "loss": 1.7428, + "learning_rate": 0.0001864864864864865, + "loss": 1.2362, "step": 75 }, { "epoch": 0.68, - "learning_rate": 1.5435435435435437e-05, - "loss": 1.8585, + "learning_rate": 0.00018630630630630632, + "loss": 1.3428, "step": 76 }, { "epoch": 0.69, - "learning_rate": 1.5375375375375377e-05, - "loss": 1.9221, + "learning_rate": 0.00018612612612612614, + "loss": 1.4464, "step": 77 }, { "epoch": 0.7, - "learning_rate": 1.5315315315315316e-05, - "loss": 1.9126, + "learning_rate": 0.00018594594594594597, + "loss": 1.4192, "step": 78 }, { "epoch": 0.71, - "learning_rate": 1.5255255255255257e-05, - "loss": 1.8657, + "learning_rate": 0.00018576576576576577, + "loss": 1.4174, "step": 79 }, { "epoch": 0.72, - "learning_rate": 1.5195195195195196e-05, - "loss": 1.9183, + "learning_rate": 0.00018558558558558558, + "loss": 1.5149, "step": 80 }, { "epoch": 0.73, - "learning_rate": 1.5135135135135138e-05, - "loss": 1.983, + "learning_rate": 0.0001854054054054054, + "loss": 1.504, "step": 81 }, { "epoch": 0.74, - "learning_rate": 1.5075075075075077e-05, - "loss": 1.9661, + "learning_rate": 0.00018522522522522523, + "loss": 1.4452, "step": 82 }, { "epoch": 0.75, - "learning_rate": 1.5015015015015015e-05, - "loss": 1.9937, + "learning_rate": 0.00018504504504504506, + "loss": 1.5763, "step": 83 }, { "epoch": 0.76, - "learning_rate": 1.4954954954954957e-05, - "loss": 1.8473, + "learning_rate": 0.00018486486486486486, + "loss": 1.4288, "step": 84 }, { "epoch": 0.77, - "learning_rate": 1.4894894894894895e-05, - "loss": 1.9417, + "learning_rate": 0.0001846846846846847, + "loss": 1.5056, "step": 85 }, { "epoch": 0.77, - "learning_rate": 1.4834834834834836e-05, - "loss": 1.8969, + "learning_rate": 0.00018450450450450452, + "loss": 1.3584, "step": 86 }, { "epoch": 0.78, - "learning_rate": 1.4774774774774776e-05, - "loss": 1.8054, + "learning_rate": 0.00018432432432432435, + "loss": 1.2679, "step": 87 }, { "epoch": 0.79, - "learning_rate": 1.4714714714714715e-05, - "loss": 1.7336, + "learning_rate": 0.00018414414414414415, + "loss": 1.2421, "step": 88 }, { "epoch": 0.8, - "learning_rate": 1.4654654654654656e-05, - "loss": 1.8264, + "learning_rate": 0.00018396396396396398, + "loss": 1.2359, "step": 89 }, { "epoch": 0.81, - "learning_rate": 1.4594594594594596e-05, - "loss": 1.8013, + "learning_rate": 0.0001837837837837838, + "loss": 1.2961, "step": 90 }, { "epoch": 0.82, - "learning_rate": 1.4534534534534537e-05, - "loss": 1.9437, + "learning_rate": 0.0001836036036036036, + "loss": 1.5433, "step": 91 }, { "epoch": 0.83, - "learning_rate": 1.4474474474474476e-05, - "loss": 1.893, + "learning_rate": 0.00018342342342342343, + "loss": 1.3349, "step": 92 }, { "epoch": 0.84, - "learning_rate": 1.4414414414414416e-05, - "loss": 1.83, + "learning_rate": 0.00018324324324324324, + "loss": 1.3233, "step": 93 }, { "epoch": 0.85, - "learning_rate": 1.4354354354354357e-05, - "loss": 1.966, + "learning_rate": 0.00018306306306306306, + "loss": 1.4935, "step": 94 }, { "epoch": 0.86, - "learning_rate": 1.4294294294294296e-05, - "loss": 1.9087, + "learning_rate": 0.0001828828828828829, + "loss": 1.4808, "step": 95 }, { "epoch": 0.86, - "learning_rate": 1.4234234234234234e-05, - "loss": 1.8374, + "learning_rate": 0.00018270270270270272, + "loss": 1.3105, "step": 96 }, { "epoch": 0.87, - "learning_rate": 1.4174174174174175e-05, - "loss": 2.1122, + "learning_rate": 0.00018252252252252252, + "loss": 1.6545, "step": 97 }, { "epoch": 0.88, - "learning_rate": 1.4114114114114114e-05, - "loss": 2.0771, + "learning_rate": 0.00018234234234234235, + "loss": 1.5198, "step": 98 }, { "epoch": 0.89, - "learning_rate": 1.4054054054054055e-05, - "loss": 1.8904, + "learning_rate": 0.00018216216216216218, + "loss": 1.2939, "step": 99 }, { "epoch": 0.9, - "learning_rate": 1.3993993993993995e-05, - "loss": 1.9853, + "learning_rate": 0.000181981981981982, + "loss": 1.4136, "step": 100 }, { "epoch": 0.91, - "learning_rate": 1.3933933933933934e-05, - "loss": 1.8186, + "learning_rate": 0.0001818018018018018, + "loss": 1.2831, "step": 101 }, { "epoch": 0.92, - "learning_rate": 1.3873873873873875e-05, - "loss": 1.8105, + "learning_rate": 0.00018162162162162164, + "loss": 1.2908, "step": 102 }, { "epoch": 0.93, - "learning_rate": 1.3813813813813815e-05, - "loss": 1.6838, + "learning_rate": 0.00018144144144144144, + "loss": 1.2553, "step": 103 }, { "epoch": 0.94, - "learning_rate": 1.3753753753753756e-05, - "loss": 1.7476, + "learning_rate": 0.00018126126126126127, + "loss": 1.2482, "step": 104 }, { "epoch": 0.95, - "learning_rate": 1.3693693693693695e-05, - "loss": 1.8371, + "learning_rate": 0.0001810810810810811, + "loss": 1.3024, "step": 105 }, { "epoch": 0.95, - "learning_rate": 1.3633633633633635e-05, - "loss": 1.8751, + "learning_rate": 0.0001809009009009009, + "loss": 1.3831, "step": 106 }, { "epoch": 0.96, - "learning_rate": 1.3573573573573576e-05, - "loss": 1.7548, + "learning_rate": 0.00018072072072072072, + "loss": 1.2204, "step": 107 }, { "epoch": 0.97, - "learning_rate": 1.3513513513513515e-05, - "loss": 1.6747, + "learning_rate": 0.00018054054054054055, + "loss": 1.193, "step": 108 }, { "epoch": 0.98, - "learning_rate": 1.3453453453453456e-05, - "loss": 1.7632, + "learning_rate": 0.00018036036036036038, + "loss": 1.2178, "step": 109 }, { "epoch": 0.99, - "learning_rate": 1.3393393393393394e-05, - "loss": 1.8086, + "learning_rate": 0.00018018018018018018, + "loss": 1.3477, "step": 110 }, { "epoch": 1.0, - "learning_rate": 1.3333333333333333e-05, - "loss": 2.2582, + "learning_rate": 0.00018, + "loss": 1.848, "step": 111 }, { "epoch": 1.01, - "learning_rate": 1.3273273273273274e-05, - "loss": 1.6199, + "learning_rate": 0.00017981981981981984, + "loss": 1.0224, "step": 112 }, { "epoch": 1.02, - "learning_rate": 1.3213213213213214e-05, - "loss": 1.6633, + "learning_rate": 0.00017963963963963967, + "loss": 1.0316, "step": 113 }, { "epoch": 1.03, - "learning_rate": 1.3153153153153155e-05, - "loss": 1.6855, + "learning_rate": 0.00017945945945945947, + "loss": 0.9298, "step": 114 }, { "epoch": 1.04, - "learning_rate": 1.3093093093093094e-05, - "loss": 1.7482, + "learning_rate": 0.00017927927927927927, + "loss": 0.9607, "step": 115 }, { "epoch": 1.05, - "learning_rate": 1.3033033033033034e-05, - "loss": 1.6648, + "learning_rate": 0.0001790990990990991, + "loss": 0.9851, "step": 116 }, { "epoch": 1.05, - "learning_rate": 1.2972972972972975e-05, - "loss": 1.7411, + "learning_rate": 0.00017891891891891893, + "loss": 1.1184, "step": 117 }, { "epoch": 1.06, - "learning_rate": 1.2912912912912914e-05, - "loss": 1.7804, + "learning_rate": 0.00017873873873873876, + "loss": 1.1724, "step": 118 }, { "epoch": 1.07, - "learning_rate": 1.2852852852852854e-05, - "loss": 1.6331, + "learning_rate": 0.00017855855855855856, + "loss": 0.9032, "step": 119 }, { "epoch": 1.08, - "learning_rate": 1.2792792792792795e-05, - "loss": 1.7026, + "learning_rate": 0.00017837837837837839, + "loss": 1.0655, "step": 120 }, { "epoch": 1.09, - "learning_rate": 1.2732732732732732e-05, - "loss": 1.5627, + "learning_rate": 0.00017819819819819821, + "loss": 0.9477, "step": 121 }, { "epoch": 1.1, - "learning_rate": 1.2672672672672675e-05, - "loss": 1.7268, + "learning_rate": 0.00017801801801801804, + "loss": 1.0931, "step": 122 }, { "epoch": 1.11, - "learning_rate": 1.2612612612612613e-05, - "loss": 1.7191, + "learning_rate": 0.00017783783783783784, + "loss": 1.1222, "step": 123 }, { "epoch": 1.12, - "learning_rate": 1.2552552552552552e-05, - "loss": 1.7727, + "learning_rate": 0.00017765765765765767, + "loss": 1.0831, "step": 124 }, { "epoch": 1.13, - "learning_rate": 1.2492492492492493e-05, - "loss": 1.6784, + "learning_rate": 0.0001774774774774775, + "loss": 0.996, "step": 125 }, { "epoch": 1.14, - "learning_rate": 1.2432432432432433e-05, - "loss": 1.7173, + "learning_rate": 0.00017729729729729733, + "loss": 1.1083, "step": 126 }, { "epoch": 1.14, - "learning_rate": 1.2372372372372374e-05, - "loss": 1.7755, + "learning_rate": 0.0001771171171171171, + "loss": 1.0783, "step": 127 }, { "epoch": 1.15, - "learning_rate": 1.2312312312312313e-05, - "loss": 1.5832, + "learning_rate": 0.00017693693693693693, + "loss": 1.0126, "step": 128 }, { "epoch": 1.16, - "learning_rate": 1.2252252252252253e-05, - "loss": 1.6627, + "learning_rate": 0.00017675675675675676, + "loss": 1.0566, "step": 129 }, { "epoch": 1.17, - "learning_rate": 1.2192192192192194e-05, - "loss": 1.7832, + "learning_rate": 0.0001765765765765766, + "loss": 1.1383, "step": 130 }, { "epoch": 1.18, - "learning_rate": 1.2132132132132133e-05, - "loss": 1.6711, + "learning_rate": 0.0001763963963963964, + "loss": 1.0675, "step": 131 }, { "epoch": 1.19, - "learning_rate": 1.2072072072072074e-05, - "loss": 1.6217, + "learning_rate": 0.00017621621621621622, + "loss": 0.9634, "step": 132 }, { "epoch": 1.2, - "learning_rate": 1.2012012012012014e-05, - "loss": 1.6746, + "learning_rate": 0.00017603603603603605, + "loss": 0.9781, "step": 133 }, { "epoch": 1.21, - "learning_rate": 1.1951951951951951e-05, - "loss": 1.602, + "learning_rate": 0.00017585585585585587, + "loss": 1.0291, "step": 134 }, { "epoch": 1.22, - "learning_rate": 1.1891891891891894e-05, - "loss": 1.7648, + "learning_rate": 0.00017567567567567568, + "loss": 1.0779, "step": 135 }, { "epoch": 1.23, - "learning_rate": 1.1831831831831832e-05, - "loss": 1.5384, + "learning_rate": 0.0001754954954954955, + "loss": 0.946, "step": 136 }, { "epoch": 1.23, - "learning_rate": 1.1771771771771771e-05, - "loss": 1.6899, + "learning_rate": 0.00017531531531531533, + "loss": 1.1033, "step": 137 }, { "epoch": 1.24, - "learning_rate": 1.1711711711711713e-05, - "loss": 1.7466, + "learning_rate": 0.00017513513513513516, + "loss": 1.1328, "step": 138 }, { "epoch": 1.25, - "learning_rate": 1.1651651651651652e-05, - "loss": 1.5775, + "learning_rate": 0.00017495495495495496, + "loss": 1.014, "step": 139 }, { "epoch": 1.26, - "learning_rate": 1.1591591591591593e-05, - "loss": 1.5667, + "learning_rate": 0.00017477477477477476, + "loss": 1.0098, "step": 140 }, { "epoch": 1.27, - "learning_rate": 1.1531531531531532e-05, - "loss": 1.6807, + "learning_rate": 0.0001745945945945946, + "loss": 1.0973, "step": 141 }, { "epoch": 1.28, - "learning_rate": 1.1471471471471472e-05, - "loss": 1.7766, + "learning_rate": 0.00017441441441441442, + "loss": 1.0991, "step": 142 }, { "epoch": 1.29, - "learning_rate": 1.1411411411411413e-05, - "loss": 1.7, + "learning_rate": 0.00017423423423423425, + "loss": 0.9668, "step": 143 }, { "epoch": 1.3, - "learning_rate": 1.1351351351351352e-05, - "loss": 1.5558, + "learning_rate": 0.00017405405405405405, + "loss": 0.8774, "step": 144 }, { "epoch": 1.31, - "learning_rate": 1.1291291291291293e-05, - "loss": 1.7148, + "learning_rate": 0.00017387387387387388, + "loss": 1.0744, "step": 145 }, { "epoch": 1.32, - "learning_rate": 1.1231231231231233e-05, - "loss": 1.5432, + "learning_rate": 0.0001736936936936937, + "loss": 0.8432, "step": 146 }, { "epoch": 1.32, - "learning_rate": 1.117117117117117e-05, - "loss": 1.5042, + "learning_rate": 0.00017351351351351353, + "loss": 0.9951, "step": 147 }, { "epoch": 1.33, - "learning_rate": 1.1111111111111113e-05, - "loss": 1.8052, + "learning_rate": 0.00017333333333333334, + "loss": 1.0802, "step": 148 }, { "epoch": 1.34, - "learning_rate": 1.1051051051051051e-05, - "loss": 1.8016, + "learning_rate": 0.00017315315315315316, + "loss": 1.1801, "step": 149 }, { "epoch": 1.35, - "learning_rate": 1.0990990990990992e-05, - "loss": 1.4395, + "learning_rate": 0.000172972972972973, + "loss": 0.9059, "step": 150 }, { "epoch": 1.36, - "learning_rate": 1.0930930930930932e-05, - "loss": 1.6007, + "learning_rate": 0.0001727927927927928, + "loss": 0.9663, "step": 151 }, { "epoch": 1.37, - "learning_rate": 1.0870870870870871e-05, - "loss": 1.6131, + "learning_rate": 0.00017261261261261262, + "loss": 1.1322, "step": 152 }, { "epoch": 1.38, - "learning_rate": 1.0810810810810812e-05, - "loss": 1.612, + "learning_rate": 0.00017243243243243242, + "loss": 1.082, "step": 153 }, { "epoch": 1.39, - "learning_rate": 1.0750750750750751e-05, - "loss": 1.5766, + "learning_rate": 0.00017225225225225225, + "loss": 0.9189, "step": 154 }, { "epoch": 1.4, - "learning_rate": 1.0690690690690693e-05, - "loss": 1.577, + "learning_rate": 0.00017207207207207208, + "loss": 1.0324, "step": 155 }, { "epoch": 1.41, - "learning_rate": 1.0630630630630632e-05, - "loss": 1.6121, + "learning_rate": 0.0001718918918918919, + "loss": 0.9904, "step": 156 }, { "epoch": 1.41, - "learning_rate": 1.0570570570570571e-05, - "loss": 1.7291, + "learning_rate": 0.0001717117117117117, + "loss": 1.1135, "step": 157 }, { "epoch": 1.42, - "learning_rate": 1.0510510510510512e-05, - "loss": 1.5699, + "learning_rate": 0.00017153153153153154, + "loss": 0.9649, "step": 158 }, { "epoch": 1.43, - "learning_rate": 1.0450450450450452e-05, - "loss": 1.564, + "learning_rate": 0.00017135135135135137, + "loss": 1.0241, "step": 159 }, { "epoch": 1.44, - "learning_rate": 1.039039039039039e-05, - "loss": 1.5114, + "learning_rate": 0.0001711711711711712, + "loss": 0.8952, "step": 160 }, { "epoch": 1.45, - "learning_rate": 1.033033033033033e-05, - "loss": 1.5204, + "learning_rate": 0.000170990990990991, + "loss": 0.9294, "step": 161 }, { "epoch": 1.46, - "learning_rate": 1.027027027027027e-05, - "loss": 1.5483, + "learning_rate": 0.00017081081081081083, + "loss": 1.0734, "step": 162 }, { "epoch": 1.47, - "learning_rate": 1.0210210210210211e-05, - "loss": 1.7533, + "learning_rate": 0.00017063063063063063, + "loss": 1.075, "step": 163 }, { "epoch": 1.48, - "learning_rate": 1.015015015015015e-05, - "loss": 1.7088, + "learning_rate": 0.00017045045045045045, + "loss": 0.9994, "step": 164 }, { "epoch": 1.49, - "learning_rate": 1.009009009009009e-05, - "loss": 1.3951, + "learning_rate": 0.00017027027027027028, + "loss": 0.8595, "step": 165 }, { "epoch": 1.5, - "learning_rate": 1.0030030030030031e-05, - "loss": 1.533, + "learning_rate": 0.00017009009009009008, + "loss": 0.9343, "step": 166 }, { "epoch": 1.5, - "learning_rate": 9.96996996996997e-06, - "loss": 1.6597, + "learning_rate": 0.0001699099099099099, + "loss": 1.0496, "step": 167 }, { "epoch": 1.51, - "learning_rate": 9.90990990990991e-06, - "loss": 1.6057, + "learning_rate": 0.00016972972972972974, + "loss": 0.9934, "step": 168 }, { "epoch": 1.52, - "learning_rate": 9.849849849849851e-06, - "loss": 1.6473, + "learning_rate": 0.00016954954954954957, + "loss": 0.9766, "step": 169 }, { "epoch": 1.53, - "learning_rate": 9.78978978978979e-06, - "loss": 1.3704, + "learning_rate": 0.00016936936936936937, + "loss": 0.8376, "step": 170 }, { "epoch": 1.54, - "learning_rate": 9.729729729729732e-06, - "loss": 1.5277, + "learning_rate": 0.0001691891891891892, + "loss": 0.9374, "step": 171 }, { "epoch": 1.55, - "learning_rate": 9.669669669669671e-06, - "loss": 1.4736, + "learning_rate": 0.00016900900900900903, + "loss": 1.0476, "step": 172 }, { "epoch": 1.56, - "learning_rate": 9.60960960960961e-06, - "loss": 1.5006, + "learning_rate": 0.00016882882882882886, + "loss": 0.9219, "step": 173 }, { "epoch": 1.57, - "learning_rate": 9.54954954954955e-06, - "loss": 1.4437, + "learning_rate": 0.00016864864864864866, + "loss": 0.9586, "step": 174 }, { "epoch": 1.58, - "learning_rate": 9.489489489489491e-06, - "loss": 1.553, + "learning_rate": 0.00016846846846846846, + "loss": 1.0288, "step": 175 }, { "epoch": 1.59, - "learning_rate": 9.42942942942943e-06, - "loss": 1.5653, + "learning_rate": 0.0001682882882882883, + "loss": 0.9829, "step": 176 }, { "epoch": 1.59, - "learning_rate": 9.36936936936937e-06, - "loss": 1.4045, + "learning_rate": 0.00016810810810810812, + "loss": 0.8158, "step": 177 }, { "epoch": 1.6, - "learning_rate": 9.30930930930931e-06, - "loss": 1.6598, + "learning_rate": 0.00016792792792792794, + "loss": 1.0338, "step": 178 }, { "epoch": 1.61, - "learning_rate": 9.24924924924925e-06, - "loss": 1.4961, + "learning_rate": 0.00016774774774774775, + "loss": 1.0119, "step": 179 }, { "epoch": 1.62, - "learning_rate": 9.189189189189191e-06, - "loss": 1.4487, + "learning_rate": 0.00016756756756756757, + "loss": 0.7975, "step": 180 }, { "epoch": 1.63, - "learning_rate": 9.129129129129129e-06, - "loss": 1.4329, + "learning_rate": 0.0001673873873873874, + "loss": 0.8601, "step": 181 }, { "epoch": 1.64, - "learning_rate": 9.06906906906907e-06, - "loss": 1.5354, + "learning_rate": 0.00016720720720720723, + "loss": 1.0333, "step": 182 }, { "epoch": 1.65, - "learning_rate": 9.00900900900901e-06, - "loss": 1.7155, + "learning_rate": 0.00016702702702702703, + "loss": 1.0862, "step": 183 }, { "epoch": 1.66, - "learning_rate": 8.94894894894895e-06, - "loss": 1.6296, + "learning_rate": 0.00016684684684684686, + "loss": 0.8784, "step": 184 }, { "epoch": 1.67, - "learning_rate": 8.888888888888888e-06, - "loss": 1.5776, + "learning_rate": 0.0001666666666666667, + "loss": 1.0995, "step": 185 }, { "epoch": 1.68, - "learning_rate": 8.82882882882883e-06, - "loss": 1.5153, + "learning_rate": 0.00016648648648648652, + "loss": 1.0452, "step": 186 }, { "epoch": 1.68, - "learning_rate": 8.768768768768769e-06, - "loss": 1.353, + "learning_rate": 0.00016630630630630632, + "loss": 0.8971, "step": 187 }, { "epoch": 1.69, - "learning_rate": 8.70870870870871e-06, - "loss": 1.5484, + "learning_rate": 0.00016612612612612612, + "loss": 0.9339, "step": 188 }, { "epoch": 1.7, - "learning_rate": 8.64864864864865e-06, - "loss": 1.3934, + "learning_rate": 0.00016594594594594595, + "loss": 0.8219, "step": 189 }, { "epoch": 1.71, - "learning_rate": 8.588588588588589e-06, - "loss": 1.5505, + "learning_rate": 0.00016576576576576578, + "loss": 0.9829, "step": 190 }, { "epoch": 1.72, - "learning_rate": 8.52852852852853e-06, - "loss": 1.3282, + "learning_rate": 0.00016558558558558558, + "loss": 0.7705, "step": 191 }, { "epoch": 1.73, - "learning_rate": 8.46846846846847e-06, - "loss": 1.4492, + "learning_rate": 0.0001654054054054054, + "loss": 0.8442, "step": 192 }, { "epoch": 1.74, - "learning_rate": 8.408408408408409e-06, - "loss": 1.4575, + "learning_rate": 0.00016522522522522523, + "loss": 0.8804, "step": 193 }, { "epoch": 1.75, - "learning_rate": 8.348348348348348e-06, - "loss": 1.4301, + "learning_rate": 0.00016504504504504506, + "loss": 0.7614, "step": 194 }, { "epoch": 1.76, - "learning_rate": 8.288288288288289e-06, - "loss": 1.4052, + "learning_rate": 0.00016486486486486486, + "loss": 0.8259, "step": 195 }, { "epoch": 1.77, - "learning_rate": 8.228228228228229e-06, - "loss": 1.7595, + "learning_rate": 0.0001646846846846847, + "loss": 1.1247, "step": 196 }, { "epoch": 1.77, - "learning_rate": 8.16816816816817e-06, - "loss": 1.5578, + "learning_rate": 0.00016450450450450452, + "loss": 0.8855, "step": 197 }, { "epoch": 1.78, - "learning_rate": 8.108108108108109e-06, - "loss": 1.4915, + "learning_rate": 0.00016432432432432435, + "loss": 0.9202, "step": 198 }, { "epoch": 1.79, - "learning_rate": 8.048048048048048e-06, - "loss": 1.3835, + "learning_rate": 0.00016414414414414415, + "loss": 0.9203, "step": 199 }, { "epoch": 1.8, - "learning_rate": 7.987987987987988e-06, - "loss": 1.3505, + "learning_rate": 0.00016396396396396395, + "loss": 0.8537, "step": 200 }, { "epoch": 1.81, - "learning_rate": 7.927927927927929e-06, - "loss": 1.5297, + "learning_rate": 0.00016378378378378378, + "loss": 0.8746, "step": 201 }, { "epoch": 1.82, - "learning_rate": 7.867867867867868e-06, - "loss": 1.6206, + "learning_rate": 0.0001636036036036036, + "loss": 1.0242, "step": 202 }, { "epoch": 1.83, - "learning_rate": 7.807807807807808e-06, - "loss": 1.4387, + "learning_rate": 0.00016342342342342344, + "loss": 0.8806, "step": 203 }, { "epoch": 1.84, - "learning_rate": 7.747747747747749e-06, - "loss": 1.5027, + "learning_rate": 0.00016324324324324324, + "loss": 0.8827, "step": 204 }, { "epoch": 1.85, - "learning_rate": 7.687687687687688e-06, - "loss": 1.468, + "learning_rate": 0.00016306306306306307, + "loss": 0.9695, "step": 205 }, { "epoch": 1.86, - "learning_rate": 7.6276276276276285e-06, - "loss": 1.4163, + "learning_rate": 0.0001628828828828829, + "loss": 0.9495, "step": 206 }, { "epoch": 1.86, - "learning_rate": 7.567567567567569e-06, - "loss": 1.225, + "learning_rate": 0.00016270270270270272, + "loss": 0.7122, "step": 207 }, { "epoch": 1.87, - "learning_rate": 7.507507507507507e-06, - "loss": 1.5404, + "learning_rate": 0.00016252252252252252, + "loss": 1.0111, "step": 208 }, { "epoch": 1.88, - "learning_rate": 7.447447447447448e-06, - "loss": 1.7203, + "learning_rate": 0.00016234234234234235, + "loss": 1.1255, "step": 209 }, { "epoch": 1.89, - "learning_rate": 7.387387387387388e-06, - "loss": 1.5402, + "learning_rate": 0.00016216216216216218, + "loss": 0.8789, "step": 210 }, { "epoch": 1.9, - "learning_rate": 7.327327327327328e-06, - "loss": 1.4962, + "learning_rate": 0.000161981981981982, + "loss": 0.9309, "step": 211 }, { "epoch": 1.91, - "learning_rate": 7.267267267267268e-06, - "loss": 1.4352, + "learning_rate": 0.0001618018018018018, + "loss": 0.9133, "step": 212 }, { "epoch": 1.92, - "learning_rate": 7.207207207207208e-06, - "loss": 1.4584, + "learning_rate": 0.0001616216216216216, + "loss": 0.9779, "step": 213 }, { "epoch": 1.93, - "learning_rate": 7.147147147147148e-06, - "loss": 1.3992, + "learning_rate": 0.00016144144144144144, + "loss": 0.8261, "step": 214 }, { "epoch": 1.94, - "learning_rate": 7.087087087087087e-06, - "loss": 1.4541, + "learning_rate": 0.00016126126126126127, + "loss": 0.8621, "step": 215 }, { "epoch": 1.95, - "learning_rate": 7.027027027027028e-06, - "loss": 1.3759, + "learning_rate": 0.0001610810810810811, + "loss": 0.7617, "step": 216 }, { "epoch": 1.95, - "learning_rate": 6.966966966966967e-06, - "loss": 1.4622, + "learning_rate": 0.0001609009009009009, + "loss": 0.9391, "step": 217 }, { "epoch": 1.96, - "learning_rate": 6.906906906906907e-06, - "loss": 1.4799, + "learning_rate": 0.00016072072072072073, + "loss": 0.8437, "step": 218 }, { "epoch": 1.97, - "learning_rate": 6.846846846846848e-06, - "loss": 1.4265, + "learning_rate": 0.00016054054054054056, + "loss": 0.9364, "step": 219 }, { "epoch": 1.98, - "learning_rate": 6.786786786786788e-06, - "loss": 1.3938, + "learning_rate": 0.00016036036036036038, + "loss": 0.8519, "step": 220 }, { "epoch": 1.99, - "learning_rate": 6.726726726726728e-06, - "loss": 1.5023, + "learning_rate": 0.00016018018018018018, + "loss": 0.8703, "step": 221 }, { "epoch": 2.0, - "learning_rate": 6.666666666666667e-06, - "loss": 1.2256, + "learning_rate": 0.00016, + "loss": 0.5854, "step": 222 }, { "epoch": 2.01, - "learning_rate": 6.606606606606607e-06, - "loss": 1.3189, + "learning_rate": 0.00015981981981981984, + "loss": 0.6418, "step": 223 }, { "epoch": 2.02, - "learning_rate": 6.546546546546547e-06, - "loss": 1.4738, + "learning_rate": 0.00015963963963963964, + "loss": 0.6642, "step": 224 }, { "epoch": 2.03, - "learning_rate": 6.486486486486487e-06, - "loss": 1.4976, + "learning_rate": 0.00015945945945945947, + "loss": 0.7597, "step": 225 }, { "epoch": 2.04, - "learning_rate": 6.426426426426427e-06, - "loss": 1.4145, + "learning_rate": 0.00015927927927927927, + "loss": 0.7035, "step": 226 }, { "epoch": 2.05, - "learning_rate": 6.366366366366366e-06, - "loss": 1.3549, + "learning_rate": 0.0001590990990990991, + "loss": 0.6098, "step": 227 }, { "epoch": 2.05, - "learning_rate": 6.3063063063063065e-06, - "loss": 1.4151, + "learning_rate": 0.00015891891891891893, + "loss": 0.6118, "step": 228 }, { "epoch": 2.06, - "learning_rate": 6.246246246246247e-06, - "loss": 1.6266, + "learning_rate": 0.00015873873873873876, + "loss": 0.6998, "step": 229 }, { "epoch": 2.07, - "learning_rate": 6.186186186186187e-06, - "loss": 1.4899, + "learning_rate": 0.00015855855855855856, + "loss": 0.6526, "step": 230 }, { "epoch": 2.08, - "learning_rate": 6.126126126126126e-06, - "loss": 1.2746, + "learning_rate": 0.0001583783783783784, + "loss": 0.6019, "step": 231 }, { "epoch": 2.09, - "learning_rate": 6.066066066066067e-06, - "loss": 1.3838, + "learning_rate": 0.00015819819819819822, + "loss": 0.5906, "step": 232 }, { "epoch": 2.1, - "learning_rate": 6.006006006006007e-06, - "loss": 1.3595, + "learning_rate": 0.00015801801801801804, + "loss": 0.6253, "step": 233 }, { "epoch": 2.11, - "learning_rate": 5.945945945945947e-06, - "loss": 1.3993, + "learning_rate": 0.00015783783783783785, + "loss": 0.6721, "step": 234 }, { "epoch": 2.12, - "learning_rate": 5.885885885885886e-06, - "loss": 1.2979, + "learning_rate": 0.00015765765765765767, + "loss": 0.5985, "step": 235 }, { "epoch": 2.13, - "learning_rate": 5.825825825825826e-06, - "loss": 1.424, + "learning_rate": 0.00015747747747747747, + "loss": 0.6828, "step": 236 }, { "epoch": 2.14, - "learning_rate": 5.765765765765766e-06, - "loss": 1.3078, + "learning_rate": 0.0001572972972972973, + "loss": 0.5669, "step": 237 }, { "epoch": 2.14, - "learning_rate": 5.7057057057057065e-06, - "loss": 1.4321, + "learning_rate": 0.00015711711711711713, + "loss": 0.7135, "step": 238 }, { "epoch": 2.15, - "learning_rate": 5.645645645645647e-06, - "loss": 1.3732, + "learning_rate": 0.00015693693693693693, + "loss": 0.5734, "step": 239 }, { "epoch": 2.16, - "learning_rate": 5.585585585585585e-06, - "loss": 1.5107, + "learning_rate": 0.00015675675675675676, + "loss": 0.7206, "step": 240 }, { "epoch": 2.17, - "learning_rate": 5.5255255255255255e-06, - "loss": 1.5524, + "learning_rate": 0.0001565765765765766, + "loss": 0.7708, "step": 241 }, { "epoch": 2.18, - "learning_rate": 5.465465465465466e-06, - "loss": 1.4516, + "learning_rate": 0.00015639639639639642, + "loss": 0.744, "step": 242 }, { "epoch": 2.19, - "learning_rate": 5.405405405405406e-06, - "loss": 1.2053, + "learning_rate": 0.00015621621621621622, + "loss": 0.5623, "step": 243 }, { "epoch": 2.2, - "learning_rate": 5.345345345345346e-06, - "loss": 1.459, + "learning_rate": 0.00015603603603603605, + "loss": 0.712, "step": 244 }, { "epoch": 2.21, - "learning_rate": 5.285285285285286e-06, - "loss": 1.3932, + "learning_rate": 0.00015585585585585588, + "loss": 0.5958, "step": 245 }, { "epoch": 2.22, - "learning_rate": 5.225225225225226e-06, - "loss": 1.3219, + "learning_rate": 0.0001556756756756757, + "loss": 0.7317, "step": 246 }, { "epoch": 2.23, - "learning_rate": 5.165165165165165e-06, - "loss": 1.3934, + "learning_rate": 0.0001554954954954955, + "loss": 0.7904, "step": 247 }, { "epoch": 2.23, - "learning_rate": 5.105105105105106e-06, - "loss": 1.3035, + "learning_rate": 0.0001553153153153153, + "loss": 0.6661, "step": 248 }, { "epoch": 2.24, - "learning_rate": 5.045045045045045e-06, - "loss": 1.3401, + "learning_rate": 0.00015513513513513514, + "loss": 0.6191, "step": 249 }, { "epoch": 2.25, - "learning_rate": 4.984984984984985e-06, - "loss": 1.3076, + "learning_rate": 0.00015495495495495496, + "loss": 0.6944, "step": 250 }, { "epoch": 2.26, - "learning_rate": 4.9249249249249255e-06, - "loss": 1.3364, + "learning_rate": 0.00015477477477477477, + "loss": 0.7057, "step": 251 }, { "epoch": 2.27, - "learning_rate": 4.864864864864866e-06, - "loss": 1.2897, + "learning_rate": 0.0001545945945945946, + "loss": 0.6131, "step": 252 }, { "epoch": 2.28, - "learning_rate": 4.804804804804805e-06, - "loss": 1.2572, + "learning_rate": 0.00015441441441441442, + "loss": 0.5626, "step": 253 }, { "epoch": 2.29, - "learning_rate": 4.7447447447447454e-06, - "loss": 1.3396, + "learning_rate": 0.00015423423423423425, + "loss": 0.6566, "step": 254 }, { "epoch": 2.3, - "learning_rate": 4.684684684684685e-06, - "loss": 1.4288, + "learning_rate": 0.00015405405405405405, + "loss": 0.7677, "step": 255 }, { "epoch": 2.31, - "learning_rate": 4.624624624624625e-06, - "loss": 1.3392, + "learning_rate": 0.00015387387387387388, + "loss": 0.7221, "step": 256 }, { "epoch": 2.32, - "learning_rate": 4.5645645645645645e-06, - "loss": 1.4231, + "learning_rate": 0.0001536936936936937, + "loss": 0.7011, "step": 257 }, { "epoch": 2.32, - "learning_rate": 4.504504504504505e-06, - "loss": 1.3351, + "learning_rate": 0.00015351351351351354, + "loss": 0.651, "step": 258 }, { "epoch": 2.33, - "learning_rate": 4.444444444444444e-06, - "loss": 1.4397, + "learning_rate": 0.00015333333333333334, + "loss": 0.7339, "step": 259 }, { "epoch": 2.34, - "learning_rate": 4.384384384384384e-06, - "loss": 1.4746, + "learning_rate": 0.00015315315315315314, + "loss": 0.7311, "step": 260 }, { "epoch": 2.35, - "learning_rate": 4.324324324324325e-06, - "loss": 1.4103, + "learning_rate": 0.00015297297297297297, + "loss": 0.799, "step": 261 }, { "epoch": 2.36, - "learning_rate": 4.264264264264265e-06, - "loss": 1.325, + "learning_rate": 0.0001527927927927928, + "loss": 0.5861, "step": 262 }, { "epoch": 2.37, - "learning_rate": 4.204204204204204e-06, - "loss": 1.4474, + "learning_rate": 0.00015261261261261262, + "loss": 0.7177, "step": 263 }, { "epoch": 2.38, - "learning_rate": 4.1441441441441446e-06, - "loss": 1.243, + "learning_rate": 0.00015243243243243243, + "loss": 0.6037, "step": 264 }, { "epoch": 2.39, - "learning_rate": 4.084084084084085e-06, - "loss": 1.3342, + "learning_rate": 0.00015225225225225225, + "loss": 0.595, "step": 265 }, { "epoch": 2.4, - "learning_rate": 4.024024024024024e-06, - "loss": 1.2141, + "learning_rate": 0.00015207207207207208, + "loss": 0.4945, "step": 266 }, { "epoch": 2.41, - "learning_rate": 3.9639639639639645e-06, - "loss": 1.389, + "learning_rate": 0.0001518918918918919, + "loss": 0.7155, "step": 267 }, { "epoch": 2.41, - "learning_rate": 3.903903903903904e-06, - "loss": 1.4364, + "learning_rate": 0.0001517117117117117, + "loss": 0.633, "step": 268 }, { "epoch": 2.42, - "learning_rate": 3.843843843843844e-06, - "loss": 1.4544, + "learning_rate": 0.00015153153153153154, + "loss": 0.698, "step": 269 }, { "epoch": 2.43, - "learning_rate": 3.7837837837837844e-06, - "loss": 1.4788, + "learning_rate": 0.00015135135135135137, + "loss": 0.7286, "step": 270 }, { "epoch": 2.44, - "learning_rate": 3.723723723723724e-06, - "loss": 1.3652, + "learning_rate": 0.0001511711711711712, + "loss": 0.8749, "step": 271 }, { "epoch": 2.45, - "learning_rate": 3.663663663663664e-06, - "loss": 1.2965, + "learning_rate": 0.000150990990990991, + "loss": 0.638, "step": 272 }, { "epoch": 2.46, - "learning_rate": 3.603603603603604e-06, - "loss": 1.4887, + "learning_rate": 0.0001508108108108108, + "loss": 0.7524, "step": 273 }, { "epoch": 2.47, - "learning_rate": 3.5435435435435437e-06, - "loss": 1.2402, + "learning_rate": 0.00015063063063063063, + "loss": 1.5031, "step": 274 }, { "epoch": 2.48, - "learning_rate": 3.4834834834834835e-06, - "loss": 1.265, + "learning_rate": 0.00015045045045045046, + "loss": 1.8588, "step": 275 }, { "epoch": 2.49, - "learning_rate": 3.423423423423424e-06, - "loss": 1.3136, + "learning_rate": 0.00015027027027027028, + "loss": 0.8453, "step": 276 }, { "epoch": 2.5, - "learning_rate": 3.363363363363364e-06, - "loss": 1.5174, + "learning_rate": 0.00015009009009009009, + "loss": 1.0263, "step": 277 }, { "epoch": 2.5, - "learning_rate": 3.3033033033033035e-06, - "loss": 1.3669, + "learning_rate": 0.00014990990990990991, + "loss": 0.7366, "step": 278 }, { "epoch": 2.51, - "learning_rate": 3.2432432432432437e-06, - "loss": 1.2474, + "learning_rate": 0.00014972972972972974, + "loss": 0.7533, "step": 279 }, { "epoch": 2.52, - "learning_rate": 3.183183183183183e-06, - "loss": 1.3761, + "learning_rate": 0.00014954954954954957, + "loss": 0.7375, "step": 280 }, { "epoch": 2.53, - "learning_rate": 3.1231231231231234e-06, - "loss": 1.3245, + "learning_rate": 0.00014936936936936937, + "loss": 0.8677, "step": 281 }, { "epoch": 2.54, - "learning_rate": 3.063063063063063e-06, - "loss": 1.3889, + "learning_rate": 0.0001491891891891892, + "loss": 0.689, "step": 282 }, { "epoch": 2.55, - "learning_rate": 3.0030030030030034e-06, - "loss": 1.2964, + "learning_rate": 0.00014900900900900903, + "loss": 0.6659, "step": 283 }, { "epoch": 2.56, - "learning_rate": 2.942942942942943e-06, - "loss": 1.4232, + "learning_rate": 0.00014882882882882883, + "loss": 1.3045, "step": 284 }, { "epoch": 2.57, - "learning_rate": 2.882882882882883e-06, - "loss": 1.5877, + "learning_rate": 0.00014864864864864866, + "loss": 0.8731, "step": 285 }, { "epoch": 2.58, - "learning_rate": 2.8228228228228234e-06, - "loss": 1.7226, + "learning_rate": 0.00014846846846846846, + "loss": 0.9237, "step": 286 }, { "epoch": 2.59, - "learning_rate": 2.7627627627627628e-06, - "loss": 1.3672, + "learning_rate": 0.0001482882882882883, + "loss": 0.7298, "step": 287 }, { "epoch": 2.59, - "learning_rate": 2.702702702702703e-06, - "loss": 1.3058, + "learning_rate": 0.00014810810810810812, + "loss": 0.5712, "step": 288 }, { "epoch": 2.6, - "learning_rate": 2.642642642642643e-06, - "loss": 1.5852, + "learning_rate": 0.00014792792792792795, + "loss": 0.8097, "step": 289 }, { "epoch": 2.61, - "learning_rate": 2.5825825825825827e-06, - "loss": 1.1589, + "learning_rate": 0.00014774774774774775, + "loss": 0.5713, "step": 290 }, { "epoch": 2.62, - "learning_rate": 2.5225225225225225e-06, - "loss": 1.4041, + "learning_rate": 0.00014756756756756758, + "loss": 0.7031, "step": 291 }, { "epoch": 2.63, - "learning_rate": 2.4624624624624628e-06, - "loss": 1.3072, + "learning_rate": 0.0001473873873873874, + "loss": 0.5795, "step": 292 }, { "epoch": 2.64, - "learning_rate": 2.4024024024024026e-06, - "loss": 1.3421, + "learning_rate": 0.00014720720720720723, + "loss": 0.6107, "step": 293 }, { "epoch": 2.65, - "learning_rate": 2.3423423423423424e-06, - "loss": 1.5669, + "learning_rate": 0.00014702702702702703, + "loss": 0.7417, "step": 294 }, { "epoch": 2.66, - "learning_rate": 2.2822822822822822e-06, - "loss": 1.4368, + "learning_rate": 0.00014684684684684686, + "loss": 0.7342, "step": 295 }, { "epoch": 2.67, - "learning_rate": 2.222222222222222e-06, - "loss": 1.1976, + "learning_rate": 0.00014666666666666666, + "loss": 0.6121, "step": 296 }, { "epoch": 2.68, - "learning_rate": 2.1621621621621623e-06, - "loss": 1.4212, + "learning_rate": 0.0001464864864864865, + "loss": 0.801, "step": 297 }, { "epoch": 2.68, - "learning_rate": 2.102102102102102e-06, - "loss": 1.3067, + "learning_rate": 0.00014630630630630632, + "loss": 0.5626, "step": 298 }, { "epoch": 2.69, - "learning_rate": 2.0420420420420424e-06, - "loss": 1.3672, + "learning_rate": 0.00014612612612612612, + "loss": 0.6167, "step": 299 }, { "epoch": 2.7, - "learning_rate": 1.9819819819819822e-06, - "loss": 1.3335, + "learning_rate": 0.00014594594594594595, + "loss": 0.606, "step": 300 }, { "epoch": 2.71, - "learning_rate": 1.921921921921922e-06, - "loss": 1.467, + "learning_rate": 0.00014576576576576578, + "loss": 0.7919, "step": 301 }, { "epoch": 2.72, - "learning_rate": 1.861861861861862e-06, - "loss": 1.535, + "learning_rate": 0.0001455855855855856, + "loss": 0.9143, "step": 302 }, { "epoch": 2.73, - "learning_rate": 1.801801801801802e-06, - "loss": 1.3377, + "learning_rate": 0.0001454054054054054, + "loss": 0.5416, "step": 303 }, { "epoch": 2.74, - "learning_rate": 1.7417417417417418e-06, - "loss": 1.5501, + "learning_rate": 0.00014522522522522524, + "loss": 0.9303, "step": 304 }, { "epoch": 2.75, - "learning_rate": 1.681681681681682e-06, - "loss": 1.5146, + "learning_rate": 0.00014504504504504506, + "loss": 0.7065, "step": 305 }, { "epoch": 2.76, - "learning_rate": 1.6216216216216219e-06, - "loss": 1.3974, + "learning_rate": 0.0001448648648648649, + "loss": 0.7287, "step": 306 }, { "epoch": 2.77, - "learning_rate": 1.5615615615615617e-06, - "loss": 1.4609, + "learning_rate": 0.0001446846846846847, + "loss": 0.6792, "step": 307 }, { "epoch": 2.77, - "learning_rate": 1.5015015015015017e-06, - "loss": 1.4186, + "learning_rate": 0.0001445045045045045, + "loss": 0.762, "step": 308 }, { "epoch": 2.78, - "learning_rate": 1.4414414414414416e-06, - "loss": 1.4664, + "learning_rate": 0.00014432432432432432, + "loss": 0.5965, "step": 309 }, { "epoch": 2.79, - "learning_rate": 1.3813813813813814e-06, - "loss": 1.3609, + "learning_rate": 0.00014414414414414415, + "loss": 0.6565, "step": 310 }, { "epoch": 2.8, - "learning_rate": 1.3213213213213214e-06, - "loss": 1.4355, + "learning_rate": 0.00014396396396396395, + "loss": 0.734, "step": 311 }, { "epoch": 2.81, - "learning_rate": 1.2612612612612613e-06, - "loss": 1.3592, + "learning_rate": 0.00014378378378378378, + "loss": 0.6371, "step": 312 }, { "epoch": 2.82, - "learning_rate": 1.2012012012012013e-06, - "loss": 1.3873, + "learning_rate": 0.0001436036036036036, + "loss": 0.67, "step": 313 }, { "epoch": 2.83, - "learning_rate": 1.1411411411411411e-06, - "loss": 1.3438, + "learning_rate": 0.00014342342342342344, + "loss": 0.6313, "step": 314 }, { "epoch": 2.84, - "learning_rate": 1.0810810810810812e-06, - "loss": 1.2623, + "learning_rate": 0.00014324324324324324, + "loss": 0.6081, "step": 315 }, { "epoch": 2.85, - "learning_rate": 1.0210210210210212e-06, - "loss": 1.4811, + "learning_rate": 0.00014306306306306307, + "loss": 0.7904, "step": 316 }, { "epoch": 2.86, - "learning_rate": 9.60960960960961e-07, - "loss": 1.2461, + "learning_rate": 0.0001428828828828829, + "loss": 0.6455, "step": 317 }, { "epoch": 2.86, - "learning_rate": 9.00900900900901e-07, - "loss": 1.2419, + "learning_rate": 0.00014270270270270272, + "loss": 0.6323, "step": 318 }, { "epoch": 2.87, - "learning_rate": 8.40840840840841e-07, - "loss": 1.5783, + "learning_rate": 0.00014252252252252253, + "loss": 0.779, "step": 319 }, { "epoch": 2.88, - "learning_rate": 7.807807807807808e-07, - "loss": 1.2684, + "learning_rate": 0.00014234234234234233, + "loss": 0.5011, "step": 320 }, { "epoch": 2.89, - "learning_rate": 7.207207207207208e-07, - "loss": 1.3713, + "learning_rate": 0.00014216216216216216, + "loss": 0.6007, "step": 321 }, { "epoch": 2.9, - "learning_rate": 6.606606606606607e-07, - "loss": 1.5109, + "learning_rate": 0.00014198198198198198, + "loss": 0.7067, "step": 322 }, { "epoch": 2.91, - "learning_rate": 6.006006006006006e-07, - "loss": 1.3602, + "learning_rate": 0.0001418018018018018, + "loss": 0.6962, "step": 323 }, { "epoch": 2.92, - "learning_rate": 5.405405405405406e-07, - "loss": 1.5054, + "learning_rate": 0.00014162162162162161, + "loss": 0.7823, "step": 324 }, { "epoch": 2.93, - "learning_rate": 4.804804804804805e-07, - "loss": 1.5096, + "learning_rate": 0.00014144144144144144, + "loss": 0.7142, "step": 325 }, { "epoch": 2.94, - "learning_rate": 4.204204204204205e-07, - "loss": 1.3165, + "learning_rate": 0.00014126126126126127, + "loss": 0.6356, "step": 326 }, { "epoch": 2.95, - "learning_rate": 3.603603603603604e-07, - "loss": 1.4575, + "learning_rate": 0.0001410810810810811, + "loss": 0.7062, "step": 327 }, { "epoch": 2.95, - "learning_rate": 3.003003003003003e-07, - "loss": 1.6184, + "learning_rate": 0.0001409009009009009, + "loss": 0.7607, "step": 328 }, { "epoch": 2.96, - "learning_rate": 2.4024024024024026e-07, - "loss": 1.2022, + "learning_rate": 0.00014072072072072073, + "loss": 0.6222, "step": 329 }, { "epoch": 2.97, - "learning_rate": 1.801801801801802e-07, - "loss": 1.4417, + "learning_rate": 0.00014054054054054056, + "loss": 0.7505, "step": 330 }, { "epoch": 2.98, - "learning_rate": 1.2012012012012013e-07, - "loss": 1.2554, + "learning_rate": 0.00014036036036036039, + "loss": 0.6673, "step": 331 }, { "epoch": 2.99, - "learning_rate": 6.006006006006006e-08, - "loss": 1.28, + "learning_rate": 0.0001401801801801802, + "loss": 0.6934, "step": 332 }, { "epoch": 3.0, - "learning_rate": 0.0, - "loss": 1.2557, + "learning_rate": 0.00014, + "loss": 0.5489, "step": 333 }, { - "epoch": 3.0, - "step": 333, - "total_flos": 1915204220928000.0, - "train_loss": 1.781525854233865, - "train_runtime": 119.0355, - "train_samples_per_second": 88.889, - "train_steps_per_second": 2.797 + "epoch": 3.01, + "learning_rate": 0.00013981981981981982, + "loss": 0.4571, + "step": 334 + }, + { + "epoch": 3.02, + "learning_rate": 0.00013963963963963964, + "loss": 0.4375, + "step": 335 + }, + { + "epoch": 3.03, + "learning_rate": 0.00013945945945945947, + "loss": 0.4438, + "step": 336 + }, + { + "epoch": 3.04, + "learning_rate": 0.00013927927927927927, + "loss": 0.4085, + "step": 337 + }, + { + "epoch": 3.05, + "learning_rate": 0.0001390990990990991, + "loss": 0.5438, + "step": 338 + }, + { + "epoch": 3.05, + "learning_rate": 0.00013891891891891893, + "loss": 0.5038, + "step": 339 + }, + { + "epoch": 3.06, + "learning_rate": 0.00013873873873873876, + "loss": 0.479, + "step": 340 + }, + { + "epoch": 3.07, + "learning_rate": 0.00013855855855855856, + "loss": 0.405, + "step": 341 + }, + { + "epoch": 3.08, + "learning_rate": 0.0001383783783783784, + "loss": 0.4181, + "step": 342 + }, + { + "epoch": 3.09, + "learning_rate": 0.00013819819819819822, + "loss": 0.4513, + "step": 343 + }, + { + "epoch": 3.1, + "learning_rate": 0.00013801801801801802, + "loss": 0.4685, + "step": 344 + }, + { + "epoch": 3.11, + "learning_rate": 0.00013783783783783785, + "loss": 0.5291, + "step": 345 + }, + { + "epoch": 3.12, + "learning_rate": 0.00013765765765765765, + "loss": 0.5497, + "step": 346 + }, + { + "epoch": 3.13, + "learning_rate": 0.00013747747747747748, + "loss": 0.4366, + "step": 347 + }, + { + "epoch": 3.14, + "learning_rate": 0.0001372972972972973, + "loss": 0.5815, + "step": 348 + }, + { + "epoch": 3.14, + "learning_rate": 0.00013711711711711713, + "loss": 0.5056, + "step": 349 + }, + { + "epoch": 3.15, + "learning_rate": 0.00013693693693693693, + "loss": 0.4975, + "step": 350 + }, + { + "epoch": 3.16, + "learning_rate": 0.00013675675675675676, + "loss": 0.4601, + "step": 351 + }, + { + "epoch": 3.17, + "learning_rate": 0.0001365765765765766, + "loss": 0.4328, + "step": 352 + }, + { + "epoch": 3.18, + "learning_rate": 0.00013639639639639642, + "loss": 0.5189, + "step": 353 + }, + { + "epoch": 3.19, + "learning_rate": 0.00013621621621621622, + "loss": 0.4054, + "step": 354 + }, + { + "epoch": 3.2, + "learning_rate": 0.00013603603603603605, + "loss": 0.4604, + "step": 355 + }, + { + "epoch": 3.21, + "learning_rate": 0.00013585585585585585, + "loss": 0.5539, + "step": 356 + }, + { + "epoch": 3.22, + "learning_rate": 0.00013567567567567568, + "loss": 0.3773, + "step": 357 + }, + { + "epoch": 3.23, + "learning_rate": 0.0001354954954954955, + "loss": 0.4631, + "step": 358 + }, + { + "epoch": 3.23, + "learning_rate": 0.0001353153153153153, + "loss": 0.463, + "step": 359 + }, + { + "epoch": 3.24, + "learning_rate": 0.00013513513513513514, + "loss": 0.4423, + "step": 360 + }, + { + "epoch": 3.25, + "learning_rate": 0.00013495495495495497, + "loss": 0.5197, + "step": 361 + }, + { + "epoch": 3.26, + "learning_rate": 0.0001347747747747748, + "loss": 0.6306, + "step": 362 + }, + { + "epoch": 3.27, + "learning_rate": 0.0001345945945945946, + "loss": 0.431, + "step": 363 + }, + { + "epoch": 3.28, + "learning_rate": 0.00013441441441441442, + "loss": 0.4192, + "step": 364 + }, + { + "epoch": 3.29, + "learning_rate": 0.00013423423423423425, + "loss": 0.5175, + "step": 365 + }, + { + "epoch": 3.3, + "learning_rate": 0.00013405405405405408, + "loss": 0.4623, + "step": 366 + }, + { + "epoch": 3.31, + "learning_rate": 0.00013387387387387388, + "loss": 0.4332, + "step": 367 + }, + { + "epoch": 3.32, + "learning_rate": 0.00013369369369369368, + "loss": 0.4969, + "step": 368 + }, + { + "epoch": 3.32, + "learning_rate": 0.0001335135135135135, + "loss": 0.5908, + "step": 369 + }, + { + "epoch": 3.33, + "learning_rate": 0.00013333333333333334, + "loss": 0.4597, + "step": 370 + }, + { + "epoch": 3.34, + "learning_rate": 0.00013315315315315314, + "loss": 0.4516, + "step": 371 + }, + { + "epoch": 3.35, + "learning_rate": 0.00013297297297297297, + "loss": 0.498, + "step": 372 + }, + { + "epoch": 3.36, + "learning_rate": 0.0001327927927927928, + "loss": 0.4829, + "step": 373 + }, + { + "epoch": 3.37, + "learning_rate": 0.00013261261261261263, + "loss": 0.4679, + "step": 374 + }, + { + "epoch": 3.38, + "learning_rate": 0.00013243243243243243, + "loss": 0.5148, + "step": 375 + }, + { + "epoch": 3.39, + "learning_rate": 0.00013225225225225226, + "loss": 0.4495, + "step": 376 + }, + { + "epoch": 3.4, + "learning_rate": 0.00013207207207207208, + "loss": 0.4408, + "step": 377 + }, + { + "epoch": 3.41, + "learning_rate": 0.0001318918918918919, + "loss": 0.4732, + "step": 378 + }, + { + "epoch": 3.41, + "learning_rate": 0.00013171171171171171, + "loss": 0.4048, + "step": 379 + }, + { + "epoch": 3.42, + "learning_rate": 0.00013153153153153154, + "loss": 0.4607, + "step": 380 + }, + { + "epoch": 3.43, + "learning_rate": 0.00013135135135135134, + "loss": 0.457, + "step": 381 + }, + { + "epoch": 3.44, + "learning_rate": 0.00013117117117117117, + "loss": 0.4446, + "step": 382 + }, + { + "epoch": 3.45, + "learning_rate": 0.000130990990990991, + "loss": 0.5565, + "step": 383 + }, + { + "epoch": 3.46, + "learning_rate": 0.0001308108108108108, + "loss": 0.452, + "step": 384 + }, + { + "epoch": 3.47, + "learning_rate": 0.00013063063063063063, + "loss": 0.6298, + "step": 385 + }, + { + "epoch": 3.48, + "learning_rate": 0.00013045045045045046, + "loss": 0.5399, + "step": 386 + }, + { + "epoch": 3.49, + "learning_rate": 0.0001302702702702703, + "loss": 0.4797, + "step": 387 + }, + { + "epoch": 3.5, + "learning_rate": 0.0001300900900900901, + "loss": 0.5867, + "step": 388 + }, + { + "epoch": 3.5, + "learning_rate": 0.00012990990990990992, + "loss": 0.4448, + "step": 389 + }, + { + "epoch": 3.51, + "learning_rate": 0.00012972972972972974, + "loss": 0.5132, + "step": 390 + }, + { + "epoch": 3.52, + "learning_rate": 0.00012954954954954957, + "loss": 0.4374, + "step": 391 + }, + { + "epoch": 3.53, + "learning_rate": 0.00012936936936936937, + "loss": 0.5119, + "step": 392 + }, + { + "epoch": 3.54, + "learning_rate": 0.00012918918918918918, + "loss": 0.5887, + "step": 393 + }, + { + "epoch": 3.55, + "learning_rate": 0.000129009009009009, + "loss": 0.477, + "step": 394 + }, + { + "epoch": 3.56, + "learning_rate": 0.00012882882882882883, + "loss": 0.5135, + "step": 395 + }, + { + "epoch": 3.57, + "learning_rate": 0.00012864864864864866, + "loss": 0.4852, + "step": 396 + }, + { + "epoch": 3.58, + "learning_rate": 0.00012846846846846846, + "loss": 0.5327, + "step": 397 + }, + { + "epoch": 3.59, + "learning_rate": 0.0001282882882882883, + "loss": 0.4726, + "step": 398 + }, + { + "epoch": 3.59, + "learning_rate": 0.00012810810810810812, + "loss": 0.4805, + "step": 399 + }, + { + "epoch": 3.6, + "learning_rate": 0.00012792792792792795, + "loss": 0.4577, + "step": 400 + }, + { + "epoch": 3.61, + "learning_rate": 0.00012774774774774775, + "loss": 0.4823, + "step": 401 + }, + { + "epoch": 3.62, + "learning_rate": 0.00012756756756756758, + "loss": 0.4779, + "step": 402 + }, + { + "epoch": 3.63, + "learning_rate": 0.0001273873873873874, + "loss": 0.586, + "step": 403 + }, + { + "epoch": 3.64, + "learning_rate": 0.00012720720720720723, + "loss": 0.4533, + "step": 404 + }, + { + "epoch": 3.65, + "learning_rate": 0.00012702702702702703, + "loss": 0.5355, + "step": 405 + }, + { + "epoch": 3.66, + "learning_rate": 0.00012684684684684684, + "loss": 0.581, + "step": 406 + }, + { + "epoch": 3.67, + "learning_rate": 0.00012666666666666666, + "loss": 0.4933, + "step": 407 + }, + { + "epoch": 3.68, + "learning_rate": 0.0001264864864864865, + "loss": 0.5883, + "step": 408 + }, + { + "epoch": 3.68, + "learning_rate": 0.00012630630630630632, + "loss": 0.5552, + "step": 409 + }, + { + "epoch": 3.69, + "learning_rate": 0.00012612612612612612, + "loss": 0.5098, + "step": 410 + }, + { + "epoch": 3.7, + "learning_rate": 0.00012594594594594595, + "loss": 0.4585, + "step": 411 + }, + { + "epoch": 3.71, + "learning_rate": 0.00012576576576576578, + "loss": 0.5726, + "step": 412 + }, + { + "epoch": 3.72, + "learning_rate": 0.0001255855855855856, + "loss": 0.5016, + "step": 413 + }, + { + "epoch": 3.73, + "learning_rate": 0.0001254054054054054, + "loss": 0.6272, + "step": 414 + }, + { + "epoch": 3.74, + "learning_rate": 0.00012522522522522524, + "loss": 0.5357, + "step": 415 + }, + { + "epoch": 3.75, + "learning_rate": 0.00012504504504504507, + "loss": 0.4562, + "step": 416 + }, + { + "epoch": 3.76, + "learning_rate": 0.00012486486486486487, + "loss": 0.5279, + "step": 417 + }, + { + "epoch": 3.77, + "learning_rate": 0.0001246846846846847, + "loss": 0.5381, + "step": 418 + }, + { + "epoch": 3.77, + "learning_rate": 0.0001245045045045045, + "loss": 0.5124, + "step": 419 + }, + { + "epoch": 3.78, + "learning_rate": 0.00012432432432432433, + "loss": 0.5242, + "step": 420 + }, + { + "epoch": 3.79, + "learning_rate": 0.00012414414414414415, + "loss": 0.4864, + "step": 421 + }, + { + "epoch": 3.8, + "learning_rate": 0.00012396396396396398, + "loss": 0.4408, + "step": 422 + }, + { + "epoch": 3.81, + "learning_rate": 0.00012378378378378378, + "loss": 0.4686, + "step": 423 + }, + { + "epoch": 3.82, + "learning_rate": 0.0001236036036036036, + "loss": 0.4583, + "step": 424 + }, + { + "epoch": 3.83, + "learning_rate": 0.00012342342342342344, + "loss": 0.5698, + "step": 425 + }, + { + "epoch": 3.84, + "learning_rate": 0.00012324324324324327, + "loss": 0.5193, + "step": 426 + }, + { + "epoch": 3.85, + "learning_rate": 0.00012306306306306307, + "loss": 0.6061, + "step": 427 + }, + { + "epoch": 3.86, + "learning_rate": 0.0001228828828828829, + "loss": 0.4556, + "step": 428 + }, + { + "epoch": 3.86, + "learning_rate": 0.0001227027027027027, + "loss": 0.4494, + "step": 429 + }, + { + "epoch": 3.87, + "learning_rate": 0.00012252252252252253, + "loss": 0.4351, + "step": 430 + }, + { + "epoch": 3.88, + "learning_rate": 0.00012234234234234233, + "loss": 0.4312, + "step": 431 + }, + { + "epoch": 3.89, + "learning_rate": 0.00012216216216216216, + "loss": 0.5729, + "step": 432 + }, + { + "epoch": 3.9, + "learning_rate": 0.00012198198198198199, + "loss": 0.4375, + "step": 433 + }, + { + "epoch": 3.91, + "learning_rate": 0.0001218018018018018, + "loss": 0.5225, + "step": 434 + }, + { + "epoch": 3.92, + "learning_rate": 0.00012162162162162163, + "loss": 0.4699, + "step": 435 + }, + { + "epoch": 3.93, + "learning_rate": 0.00012144144144144144, + "loss": 0.4862, + "step": 436 + }, + { + "epoch": 3.94, + "learning_rate": 0.00012126126126126127, + "loss": 0.5477, + "step": 437 + }, + { + "epoch": 3.95, + "learning_rate": 0.00012108108108108109, + "loss": 0.5063, + "step": 438 + }, + { + "epoch": 3.95, + "learning_rate": 0.00012090090090090092, + "loss": 0.4708, + "step": 439 + }, + { + "epoch": 3.96, + "learning_rate": 0.00012072072072072073, + "loss": 0.3925, + "step": 440 + }, + { + "epoch": 3.97, + "learning_rate": 0.00012054054054054053, + "loss": 0.5277, + "step": 441 + }, + { + "epoch": 3.98, + "learning_rate": 0.00012036036036036036, + "loss": 0.5806, + "step": 442 + }, + { + "epoch": 3.99, + "learning_rate": 0.00012018018018018017, + "loss": 0.4542, + "step": 443 + }, + { + "epoch": 4.0, + "learning_rate": 0.00012, + "loss": 0.6045, + "step": 444 + }, + { + "epoch": 4.01, + "learning_rate": 0.00011981981981981982, + "loss": 0.3265, + "step": 445 + }, + { + "epoch": 4.02, + "learning_rate": 0.00011963963963963965, + "loss": 0.3141, + "step": 446 + }, + { + "epoch": 4.03, + "learning_rate": 0.00011945945945945946, + "loss": 0.312, + "step": 447 + }, + { + "epoch": 4.04, + "learning_rate": 0.00011927927927927929, + "loss": 0.3917, + "step": 448 + }, + { + "epoch": 4.05, + "learning_rate": 0.0001190990990990991, + "loss": 0.4704, + "step": 449 + }, + { + "epoch": 4.05, + "learning_rate": 0.00011891891891891893, + "loss": 0.3541, + "step": 450 + }, + { + "epoch": 4.06, + "learning_rate": 0.00011873873873873875, + "loss": 0.401, + "step": 451 + }, + { + "epoch": 4.07, + "learning_rate": 0.00011855855855855858, + "loss": 0.3266, + "step": 452 + }, + { + "epoch": 4.08, + "learning_rate": 0.00011837837837837838, + "loss": 0.3155, + "step": 453 + }, + { + "epoch": 4.09, + "learning_rate": 0.00011819819819819819, + "loss": 0.3531, + "step": 454 + }, + { + "epoch": 4.1, + "learning_rate": 0.00011801801801801802, + "loss": 0.3775, + "step": 455 + }, + { + "epoch": 4.11, + "learning_rate": 0.00011783783783783784, + "loss": 0.3703, + "step": 456 + }, + { + "epoch": 4.12, + "learning_rate": 0.00011765765765765766, + "loss": 0.3814, + "step": 457 + }, + { + "epoch": 4.13, + "learning_rate": 0.00011747747747747748, + "loss": 0.3786, + "step": 458 + }, + { + "epoch": 4.14, + "learning_rate": 0.00011729729729729731, + "loss": 0.3343, + "step": 459 + }, + { + "epoch": 4.14, + "learning_rate": 0.00011711711711711712, + "loss": 0.3516, + "step": 460 + }, + { + "epoch": 4.15, + "learning_rate": 0.00011693693693693695, + "loss": 0.3729, + "step": 461 + }, + { + "epoch": 4.16, + "learning_rate": 0.00011675675675675676, + "loss": 0.3886, + "step": 462 + }, + { + "epoch": 4.17, + "learning_rate": 0.0001165765765765766, + "loss": 0.364, + "step": 463 + }, + { + "epoch": 4.18, + "learning_rate": 0.00011639639639639641, + "loss": 0.3486, + "step": 464 + }, + { + "epoch": 4.19, + "learning_rate": 0.00011621621621621621, + "loss": 0.3411, + "step": 465 + }, + { + "epoch": 4.2, + "learning_rate": 0.00011603603603603604, + "loss": 0.4359, + "step": 466 + }, + { + "epoch": 4.21, + "learning_rate": 0.00011585585585585585, + "loss": 0.336, + "step": 467 + }, + { + "epoch": 4.22, + "learning_rate": 0.00011567567567567568, + "loss": 0.4453, + "step": 468 + }, + { + "epoch": 4.23, + "learning_rate": 0.0001154954954954955, + "loss": 0.3626, + "step": 469 + }, + { + "epoch": 4.23, + "learning_rate": 0.00011531531531531532, + "loss": 0.3608, + "step": 470 + }, + { + "epoch": 4.24, + "learning_rate": 0.00011513513513513514, + "loss": 0.3803, + "step": 471 + }, + { + "epoch": 4.25, + "learning_rate": 0.00011495495495495497, + "loss": 0.3862, + "step": 472 + }, + { + "epoch": 4.26, + "learning_rate": 0.00011477477477477478, + "loss": 0.3639, + "step": 473 + }, + { + "epoch": 4.27, + "learning_rate": 0.00011459459459459461, + "loss": 0.3345, + "step": 474 + }, + { + "epoch": 4.28, + "learning_rate": 0.00011441441441441443, + "loss": 0.3343, + "step": 475 + }, + { + "epoch": 4.29, + "learning_rate": 0.00011423423423423425, + "loss": 0.3943, + "step": 476 + }, + { + "epoch": 4.3, + "learning_rate": 0.00011405405405405406, + "loss": 0.3802, + "step": 477 + }, + { + "epoch": 4.31, + "learning_rate": 0.00011387387387387387, + "loss": 0.3523, + "step": 478 + }, + { + "epoch": 4.32, + "learning_rate": 0.0001136936936936937, + "loss": 0.4398, + "step": 479 + }, + { + "epoch": 4.32, + "learning_rate": 0.00011351351351351351, + "loss": 0.3851, + "step": 480 + }, + { + "epoch": 4.33, + "learning_rate": 0.00011333333333333334, + "loss": 0.3755, + "step": 481 + }, + { + "epoch": 4.34, + "learning_rate": 0.00011315315315315316, + "loss": 0.3402, + "step": 482 + }, + { + "epoch": 4.35, + "learning_rate": 0.00011297297297297298, + "loss": 0.3227, + "step": 483 + }, + { + "epoch": 4.36, + "learning_rate": 0.0001127927927927928, + "loss": 0.3282, + "step": 484 + }, + { + "epoch": 4.37, + "learning_rate": 0.00011261261261261263, + "loss": 0.3621, + "step": 485 + }, + { + "epoch": 4.38, + "learning_rate": 0.00011243243243243244, + "loss": 0.3507, + "step": 486 + }, + { + "epoch": 4.39, + "learning_rate": 0.00011225225225225227, + "loss": 0.412, + "step": 487 + }, + { + "epoch": 4.4, + "learning_rate": 0.00011207207207207209, + "loss": 0.5337, + "step": 488 + }, + { + "epoch": 4.41, + "learning_rate": 0.00011189189189189189, + "loss": 0.4157, + "step": 489 + }, + { + "epoch": 4.41, + "learning_rate": 0.0001117117117117117, + "loss": 0.411, + "step": 490 + }, + { + "epoch": 4.42, + "learning_rate": 0.00011153153153153153, + "loss": 0.4512, + "step": 491 + }, + { + "epoch": 4.43, + "learning_rate": 0.00011135135135135135, + "loss": 0.3734, + "step": 492 + }, + { + "epoch": 4.44, + "learning_rate": 0.00011117117117117117, + "loss": 0.3837, + "step": 493 + }, + { + "epoch": 4.45, + "learning_rate": 0.00011099099099099099, + "loss": 0.4167, + "step": 494 + }, + { + "epoch": 4.46, + "learning_rate": 0.00011081081081081082, + "loss": 0.3796, + "step": 495 + }, + { + "epoch": 4.47, + "learning_rate": 0.00011063063063063063, + "loss": 0.3891, + "step": 496 + }, + { + "epoch": 4.48, + "learning_rate": 0.00011045045045045046, + "loss": 0.3086, + "step": 497 + }, + { + "epoch": 4.49, + "learning_rate": 0.00011027027027027029, + "loss": 0.4173, + "step": 498 + }, + { + "epoch": 4.5, + "learning_rate": 0.0001100900900900901, + "loss": 0.3275, + "step": 499 + }, + { + "epoch": 4.5, + "learning_rate": 0.00010990990990990993, + "loss": 0.357, + "step": 500 + }, + { + "epoch": 4.51, + "learning_rate": 0.00010972972972972972, + "loss": 0.4611, + "step": 501 + }, + { + "epoch": 4.52, + "learning_rate": 0.00010954954954954955, + "loss": 0.3544, + "step": 502 + }, + { + "epoch": 4.53, + "learning_rate": 0.00010936936936936936, + "loss": 0.3564, + "step": 503 + }, + { + "epoch": 4.54, + "learning_rate": 0.00010918918918918919, + "loss": 0.3306, + "step": 504 + }, + { + "epoch": 4.55, + "learning_rate": 0.000109009009009009, + "loss": 0.4079, + "step": 505 + }, + { + "epoch": 4.56, + "learning_rate": 0.00010882882882882883, + "loss": 0.3906, + "step": 506 + }, + { + "epoch": 4.57, + "learning_rate": 0.00010864864864864865, + "loss": 0.3863, + "step": 507 + }, + { + "epoch": 4.58, + "learning_rate": 0.00010846846846846848, + "loss": 0.3378, + "step": 508 + }, + { + "epoch": 4.59, + "learning_rate": 0.00010828828828828829, + "loss": 0.3556, + "step": 509 + }, + { + "epoch": 4.59, + "learning_rate": 0.00010810810810810812, + "loss": 0.2989, + "step": 510 + }, + { + "epoch": 4.6, + "learning_rate": 0.00010792792792792794, + "loss": 0.4635, + "step": 511 + }, + { + "epoch": 4.61, + "learning_rate": 0.00010774774774774776, + "loss": 0.3909, + "step": 512 + }, + { + "epoch": 4.62, + "learning_rate": 0.00010756756756756757, + "loss": 0.3124, + "step": 513 + }, + { + "epoch": 4.63, + "learning_rate": 0.00010738738738738738, + "loss": 0.4026, + "step": 514 + }, + { + "epoch": 4.64, + "learning_rate": 0.00010720720720720721, + "loss": 0.3955, + "step": 515 + }, + { + "epoch": 4.65, + "learning_rate": 0.00010702702702702702, + "loss": 0.3776, + "step": 516 + }, + { + "epoch": 4.66, + "learning_rate": 0.00010684684684684685, + "loss": 0.3324, + "step": 517 + }, + { + "epoch": 4.67, + "learning_rate": 0.00010666666666666667, + "loss": 0.3567, + "step": 518 + }, + { + "epoch": 4.68, + "learning_rate": 0.0001064864864864865, + "loss": 0.3758, + "step": 519 + }, + { + "epoch": 4.68, + "learning_rate": 0.00010630630630630631, + "loss": 0.3658, + "step": 520 + }, + { + "epoch": 4.69, + "learning_rate": 0.00010612612612612614, + "loss": 0.4016, + "step": 521 + }, + { + "epoch": 4.7, + "learning_rate": 0.00010594594594594595, + "loss": 0.3351, + "step": 522 + }, + { + "epoch": 4.71, + "learning_rate": 0.00010576576576576578, + "loss": 0.4547, + "step": 523 + }, + { + "epoch": 4.72, + "learning_rate": 0.0001055855855855856, + "loss": 0.4222, + "step": 524 + }, + { + "epoch": 4.73, + "learning_rate": 0.0001054054054054054, + "loss": 0.3836, + "step": 525 + }, + { + "epoch": 4.74, + "learning_rate": 0.00010522522522522523, + "loss": 0.3061, + "step": 526 + }, + { + "epoch": 4.75, + "learning_rate": 0.00010504504504504504, + "loss": 0.3326, + "step": 527 + }, + { + "epoch": 4.76, + "learning_rate": 0.00010486486486486487, + "loss": 0.3756, + "step": 528 + }, + { + "epoch": 4.77, + "learning_rate": 0.00010468468468468468, + "loss": 0.3527, + "step": 529 + }, + { + "epoch": 4.77, + "learning_rate": 0.00010450450450450451, + "loss": 0.3688, + "step": 530 + }, + { + "epoch": 4.78, + "learning_rate": 0.00010432432432432433, + "loss": 0.3322, + "step": 531 + }, + { + "epoch": 4.79, + "learning_rate": 0.00010414414414414416, + "loss": 0.4191, + "step": 532 + }, + { + "epoch": 4.8, + "learning_rate": 0.00010396396396396397, + "loss": 0.3586, + "step": 533 + }, + { + "epoch": 4.81, + "learning_rate": 0.0001037837837837838, + "loss": 0.4528, + "step": 534 + }, + { + "epoch": 4.82, + "learning_rate": 0.00010360360360360361, + "loss": 0.2971, + "step": 535 + }, + { + "epoch": 4.83, + "learning_rate": 0.00010342342342342344, + "loss": 0.3338, + "step": 536 + }, + { + "epoch": 4.84, + "learning_rate": 0.00010324324324324324, + "loss": 0.3805, + "step": 537 + }, + { + "epoch": 4.85, + "learning_rate": 0.00010306306306306306, + "loss": 0.3958, + "step": 538 + }, + { + "epoch": 4.86, + "learning_rate": 0.00010288288288288289, + "loss": 0.3995, + "step": 539 + }, + { + "epoch": 4.86, + "learning_rate": 0.0001027027027027027, + "loss": 0.3661, + "step": 540 + }, + { + "epoch": 4.87, + "learning_rate": 0.00010252252252252253, + "loss": 0.2927, + "step": 541 + }, + { + "epoch": 4.88, + "learning_rate": 0.00010234234234234234, + "loss": 0.4004, + "step": 542 + }, + { + "epoch": 4.89, + "learning_rate": 0.00010216216216216217, + "loss": 0.3619, + "step": 543 + }, + { + "epoch": 4.9, + "learning_rate": 0.00010198198198198199, + "loss": 0.3478, + "step": 544 + }, + { + "epoch": 4.91, + "learning_rate": 0.00010180180180180182, + "loss": 0.417, + "step": 545 + }, + { + "epoch": 4.92, + "learning_rate": 0.00010162162162162163, + "loss": 0.4245, + "step": 546 + }, + { + "epoch": 4.93, + "learning_rate": 0.00010144144144144146, + "loss": 0.319, + "step": 547 + }, + { + "epoch": 4.94, + "learning_rate": 0.00010126126126126127, + "loss": 0.281, + "step": 548 + }, + { + "epoch": 4.95, + "learning_rate": 0.00010108108108108108, + "loss": 0.3438, + "step": 549 + }, + { + "epoch": 4.95, + "learning_rate": 0.00010090090090090089, + "loss": 0.3441, + "step": 550 + }, + { + "epoch": 4.96, + "learning_rate": 0.00010072072072072072, + "loss": 0.3684, + "step": 551 + }, + { + "epoch": 4.97, + "learning_rate": 0.00010054054054054053, + "loss": 0.3679, + "step": 552 + }, + { + "epoch": 4.98, + "learning_rate": 0.00010036036036036036, + "loss": 0.3563, + "step": 553 + }, + { + "epoch": 4.99, + "learning_rate": 0.00010018018018018018, + "loss": 0.4197, + "step": 554 + }, + { + "epoch": 5.0, + "learning_rate": 0.0001, + "loss": 0.4142, + "step": 555 + }, + { + "epoch": 5.01, + "learning_rate": 9.981981981981983e-05, + "loss": 0.2757, + "step": 556 + }, + { + "epoch": 5.02, + "learning_rate": 9.963963963963965e-05, + "loss": 0.3093, + "step": 557 + }, + { + "epoch": 5.03, + "learning_rate": 9.945945945945948e-05, + "loss": 0.2858, + "step": 558 + }, + { + "epoch": 5.04, + "learning_rate": 9.927927927927928e-05, + "loss": 0.2819, + "step": 559 + }, + { + "epoch": 5.05, + "learning_rate": 9.90990990990991e-05, + "loss": 0.2873, + "step": 560 + }, + { + "epoch": 5.05, + "learning_rate": 9.891891891891892e-05, + "loss": 0.2807, + "step": 561 + }, + { + "epoch": 5.06, + "learning_rate": 9.873873873873875e-05, + "loss": 0.3117, + "step": 562 + }, + { + "epoch": 5.07, + "learning_rate": 9.855855855855856e-05, + "loss": 0.2886, + "step": 563 + }, + { + "epoch": 5.08, + "learning_rate": 9.837837837837839e-05, + "loss": 0.2797, + "step": 564 + }, + { + "epoch": 5.09, + "learning_rate": 9.81981981981982e-05, + "loss": 0.3165, + "step": 565 + }, + { + "epoch": 5.1, + "learning_rate": 9.801801801801802e-05, + "loss": 0.3353, + "step": 566 + }, + { + "epoch": 5.11, + "learning_rate": 9.783783783783784e-05, + "loss": 0.2902, + "step": 567 + }, + { + "epoch": 5.12, + "learning_rate": 9.765765765765767e-05, + "loss": 0.2807, + "step": 568 + }, + { + "epoch": 5.13, + "learning_rate": 9.747747747747748e-05, + "loss": 0.3003, + "step": 569 + }, + { + "epoch": 5.14, + "learning_rate": 9.729729729729731e-05, + "loss": 0.2983, + "step": 570 + }, + { + "epoch": 5.14, + "learning_rate": 9.711711711711712e-05, + "loss": 0.2716, + "step": 571 + }, + { + "epoch": 5.15, + "learning_rate": 9.693693693693694e-05, + "loss": 0.3266, + "step": 572 + }, + { + "epoch": 5.16, + "learning_rate": 9.675675675675677e-05, + "loss": 0.2585, + "step": 573 + }, + { + "epoch": 5.17, + "learning_rate": 9.657657657657658e-05, + "loss": 0.2978, + "step": 574 + }, + { + "epoch": 5.18, + "learning_rate": 9.639639639639641e-05, + "loss": 0.3214, + "step": 575 + }, + { + "epoch": 5.19, + "learning_rate": 9.621621621621622e-05, + "loss": 0.2546, + "step": 576 + }, + { + "epoch": 5.2, + "learning_rate": 9.603603603603604e-05, + "loss": 0.2801, + "step": 577 + }, + { + "epoch": 5.21, + "learning_rate": 9.585585585585585e-05, + "loss": 0.2491, + "step": 578 + }, + { + "epoch": 5.22, + "learning_rate": 9.567567567567568e-05, + "loss": 0.2667, + "step": 579 + }, + { + "epoch": 5.23, + "learning_rate": 9.54954954954955e-05, + "loss": 0.2588, + "step": 580 + }, + { + "epoch": 5.23, + "learning_rate": 9.531531531531533e-05, + "loss": 0.2887, + "step": 581 + }, + { + "epoch": 5.24, + "learning_rate": 9.513513513513514e-05, + "loss": 0.2701, + "step": 582 + }, + { + "epoch": 5.25, + "learning_rate": 9.495495495495496e-05, + "loss": 0.2838, + "step": 583 + }, + { + "epoch": 5.26, + "learning_rate": 9.477477477477478e-05, + "loss": 0.3002, + "step": 584 + }, + { + "epoch": 5.27, + "learning_rate": 9.45945945945946e-05, + "loss": 0.2738, + "step": 585 + }, + { + "epoch": 5.28, + "learning_rate": 9.441441441441443e-05, + "loss": 0.2904, + "step": 586 + }, + { + "epoch": 5.29, + "learning_rate": 9.423423423423424e-05, + "loss": 0.2691, + "step": 587 + }, + { + "epoch": 5.3, + "learning_rate": 9.405405405405407e-05, + "loss": 0.3163, + "step": 588 + }, + { + "epoch": 5.31, + "learning_rate": 9.387387387387387e-05, + "loss": 0.2756, + "step": 589 + }, + { + "epoch": 5.32, + "learning_rate": 9.36936936936937e-05, + "loss": 0.2878, + "step": 590 + }, + { + "epoch": 5.32, + "learning_rate": 9.351351351351351e-05, + "loss": 0.2815, + "step": 591 + }, + { + "epoch": 5.33, + "learning_rate": 9.333333333333334e-05, + "loss": 0.3392, + "step": 592 + }, + { + "epoch": 5.34, + "learning_rate": 9.315315315315316e-05, + "loss": 0.3289, + "step": 593 + }, + { + "epoch": 5.35, + "learning_rate": 9.297297297297299e-05, + "loss": 0.3027, + "step": 594 + }, + { + "epoch": 5.36, + "learning_rate": 9.279279279279279e-05, + "loss": 0.3511, + "step": 595 + }, + { + "epoch": 5.37, + "learning_rate": 9.261261261261262e-05, + "loss": 0.2502, + "step": 596 + }, + { + "epoch": 5.38, + "learning_rate": 9.243243243243243e-05, + "loss": 0.281, + "step": 597 + }, + { + "epoch": 5.39, + "learning_rate": 9.225225225225226e-05, + "loss": 0.3211, + "step": 598 + }, + { + "epoch": 5.4, + "learning_rate": 9.207207207207207e-05, + "loss": 0.2933, + "step": 599 + }, + { + "epoch": 5.41, + "learning_rate": 9.18918918918919e-05, + "loss": 0.3106, + "step": 600 + }, + { + "epoch": 5.41, + "learning_rate": 9.171171171171172e-05, + "loss": 0.2699, + "step": 601 + }, + { + "epoch": 5.42, + "learning_rate": 9.153153153153153e-05, + "loss": 0.2849, + "step": 602 + }, + { + "epoch": 5.43, + "learning_rate": 9.135135135135136e-05, + "loss": 0.2974, + "step": 603 + }, + { + "epoch": 5.44, + "learning_rate": 9.117117117117118e-05, + "loss": 0.2942, + "step": 604 + }, + { + "epoch": 5.45, + "learning_rate": 9.0990990990991e-05, + "loss": 0.2788, + "step": 605 + }, + { + "epoch": 5.46, + "learning_rate": 9.081081081081082e-05, + "loss": 0.2658, + "step": 606 + }, + { + "epoch": 5.47, + "learning_rate": 9.063063063063063e-05, + "loss": 0.3066, + "step": 607 + }, + { + "epoch": 5.48, + "learning_rate": 9.045045045045045e-05, + "loss": 0.3215, + "step": 608 + }, + { + "epoch": 5.49, + "learning_rate": 9.027027027027028e-05, + "loss": 0.2737, + "step": 609 + }, + { + "epoch": 5.5, + "learning_rate": 9.009009009009009e-05, + "loss": 0.279, + "step": 610 + }, + { + "epoch": 5.5, + "learning_rate": 8.990990990990992e-05, + "loss": 0.2721, + "step": 611 + }, + { + "epoch": 5.51, + "learning_rate": 8.972972972972973e-05, + "loss": 0.2429, + "step": 612 + }, + { + "epoch": 5.52, + "learning_rate": 8.954954954954955e-05, + "loss": 0.2799, + "step": 613 + }, + { + "epoch": 5.53, + "learning_rate": 8.936936936936938e-05, + "loss": 0.2756, + "step": 614 + }, + { + "epoch": 5.54, + "learning_rate": 8.918918918918919e-05, + "loss": 0.2578, + "step": 615 + }, + { + "epoch": 5.55, + "learning_rate": 8.900900900900902e-05, + "loss": 0.3057, + "step": 616 + }, + { + "epoch": 5.56, + "learning_rate": 8.882882882882884e-05, + "loss": 0.262, + "step": 617 + }, + { + "epoch": 5.57, + "learning_rate": 8.864864864864866e-05, + "loss": 0.3209, + "step": 618 + }, + { + "epoch": 5.58, + "learning_rate": 8.846846846846847e-05, + "loss": 0.2809, + "step": 619 + }, + { + "epoch": 5.59, + "learning_rate": 8.82882882882883e-05, + "loss": 0.299, + "step": 620 + }, + { + "epoch": 5.59, + "learning_rate": 8.810810810810811e-05, + "loss": 0.3359, + "step": 621 + }, + { + "epoch": 5.6, + "learning_rate": 8.792792792792794e-05, + "loss": 0.3272, + "step": 622 + }, + { + "epoch": 5.61, + "learning_rate": 8.774774774774775e-05, + "loss": 0.3184, + "step": 623 + }, + { + "epoch": 5.62, + "learning_rate": 8.756756756756758e-05, + "loss": 0.3008, + "step": 624 + }, + { + "epoch": 5.63, + "learning_rate": 8.738738738738738e-05, + "loss": 0.2611, + "step": 625 + }, + { + "epoch": 5.64, + "learning_rate": 8.720720720720721e-05, + "loss": 0.3845, + "step": 626 + }, + { + "epoch": 5.65, + "learning_rate": 8.702702702702702e-05, + "loss": 0.2434, + "step": 627 + }, + { + "epoch": 5.66, + "learning_rate": 8.684684684684685e-05, + "loss": 0.3178, + "step": 628 + }, + { + "epoch": 5.67, + "learning_rate": 8.666666666666667e-05, + "loss": 0.2643, + "step": 629 + }, + { + "epoch": 5.68, + "learning_rate": 8.64864864864865e-05, + "loss": 0.3346, + "step": 630 + }, + { + "epoch": 5.68, + "learning_rate": 8.630630630630631e-05, + "loss": 0.3385, + "step": 631 + }, + { + "epoch": 5.69, + "learning_rate": 8.612612612612613e-05, + "loss": 0.2635, + "step": 632 + }, + { + "epoch": 5.7, + "learning_rate": 8.594594594594595e-05, + "loss": 0.2559, + "step": 633 + }, + { + "epoch": 5.71, + "learning_rate": 8.576576576576577e-05, + "loss": 0.2956, + "step": 634 + }, + { + "epoch": 5.72, + "learning_rate": 8.55855855855856e-05, + "loss": 0.3279, + "step": 635 + }, + { + "epoch": 5.73, + "learning_rate": 8.540540540540541e-05, + "loss": 0.2796, + "step": 636 + }, + { + "epoch": 5.74, + "learning_rate": 8.522522522522523e-05, + "loss": 0.32, + "step": 637 + }, + { + "epoch": 5.75, + "learning_rate": 8.504504504504504e-05, + "loss": 0.2425, + "step": 638 + }, + { + "epoch": 5.76, + "learning_rate": 8.486486486486487e-05, + "loss": 0.3221, + "step": 639 + }, + { + "epoch": 5.77, + "learning_rate": 8.468468468468469e-05, + "loss": 0.3018, + "step": 640 + }, + { + "epoch": 5.77, + "learning_rate": 8.450450450450451e-05, + "loss": 0.2249, + "step": 641 + }, + { + "epoch": 5.78, + "learning_rate": 8.432432432432433e-05, + "loss": 0.2763, + "step": 642 + }, + { + "epoch": 5.79, + "learning_rate": 8.414414414414414e-05, + "loss": 0.2784, + "step": 643 + }, + { + "epoch": 5.8, + "learning_rate": 8.396396396396397e-05, + "loss": 0.3659, + "step": 644 + }, + { + "epoch": 5.81, + "learning_rate": 8.378378378378379e-05, + "loss": 0.2453, + "step": 645 + }, + { + "epoch": 5.82, + "learning_rate": 8.360360360360362e-05, + "loss": 0.3197, + "step": 646 + }, + { + "epoch": 5.83, + "learning_rate": 8.342342342342343e-05, + "loss": 0.3055, + "step": 647 + }, + { + "epoch": 5.84, + "learning_rate": 8.324324324324326e-05, + "loss": 0.2642, + "step": 648 + }, + { + "epoch": 5.85, + "learning_rate": 8.306306306306306e-05, + "loss": 0.2442, + "step": 649 + }, + { + "epoch": 5.86, + "learning_rate": 8.288288288288289e-05, + "loss": 0.274, + "step": 650 + }, + { + "epoch": 5.86, + "learning_rate": 8.27027027027027e-05, + "loss": 0.2701, + "step": 651 + }, + { + "epoch": 5.87, + "learning_rate": 8.252252252252253e-05, + "loss": 0.2802, + "step": 652 + }, + { + "epoch": 5.88, + "learning_rate": 8.234234234234235e-05, + "loss": 0.2709, + "step": 653 + }, + { + "epoch": 5.89, + "learning_rate": 8.216216216216217e-05, + "loss": 0.3404, + "step": 654 + }, + { + "epoch": 5.9, + "learning_rate": 8.198198198198198e-05, + "loss": 0.3093, + "step": 655 + }, + { + "epoch": 5.91, + "learning_rate": 8.18018018018018e-05, + "loss": 0.3159, + "step": 656 + }, + { + "epoch": 5.92, + "learning_rate": 8.162162162162162e-05, + "loss": 0.3564, + "step": 657 + }, + { + "epoch": 5.93, + "learning_rate": 8.144144144144145e-05, + "loss": 0.2453, + "step": 658 + }, + { + "epoch": 5.94, + "learning_rate": 8.126126126126126e-05, + "loss": 0.2488, + "step": 659 + }, + { + "epoch": 5.95, + "learning_rate": 8.108108108108109e-05, + "loss": 0.3104, + "step": 660 + }, + { + "epoch": 5.95, + "learning_rate": 8.09009009009009e-05, + "loss": 0.3627, + "step": 661 + }, + { + "epoch": 5.96, + "learning_rate": 8.072072072072072e-05, + "loss": 0.3423, + "step": 662 + }, + { + "epoch": 5.97, + "learning_rate": 8.054054054054055e-05, + "loss": 0.2948, + "step": 663 + }, + { + "epoch": 5.98, + "learning_rate": 8.036036036036036e-05, + "loss": 0.2747, + "step": 664 + }, + { + "epoch": 5.99, + "learning_rate": 8.018018018018019e-05, + "loss": 0.2977, + "step": 665 + }, + { + "epoch": 6.0, + "learning_rate": 8e-05, + "loss": 0.2801, + "step": 666 + }, + { + "epoch": 6.01, + "learning_rate": 7.981981981981982e-05, + "loss": 0.2422, + "step": 667 + }, + { + "epoch": 6.02, + "learning_rate": 7.963963963963964e-05, + "loss": 0.2353, + "step": 668 + }, + { + "epoch": 6.03, + "learning_rate": 7.945945945945946e-05, + "loss": 0.2646, + "step": 669 + }, + { + "epoch": 6.04, + "learning_rate": 7.927927927927928e-05, + "loss": 0.2443, + "step": 670 + }, + { + "epoch": 6.05, + "learning_rate": 7.909909909909911e-05, + "loss": 0.2291, + "step": 671 + }, + { + "epoch": 6.05, + "learning_rate": 7.891891891891892e-05, + "loss": 0.2014, + "step": 672 + }, + { + "epoch": 6.06, + "learning_rate": 7.873873873873874e-05, + "loss": 0.2264, + "step": 673 + }, + { + "epoch": 6.07, + "learning_rate": 7.855855855855857e-05, + "loss": 0.2373, + "step": 674 + }, + { + "epoch": 6.08, + "learning_rate": 7.837837837837838e-05, + "loss": 0.2223, + "step": 675 + }, + { + "epoch": 6.09, + "learning_rate": 7.819819819819821e-05, + "loss": 0.2465, + "step": 676 + }, + { + "epoch": 6.1, + "learning_rate": 7.801801801801802e-05, + "loss": 0.2812, + "step": 677 + }, + { + "epoch": 6.11, + "learning_rate": 7.783783783783785e-05, + "loss": 0.2418, + "step": 678 + }, + { + "epoch": 6.12, + "learning_rate": 7.765765765765765e-05, + "loss": 0.2626, + "step": 679 + }, + { + "epoch": 6.13, + "learning_rate": 7.747747747747748e-05, + "loss": 0.2225, + "step": 680 + }, + { + "epoch": 6.14, + "learning_rate": 7.72972972972973e-05, + "loss": 0.2215, + "step": 681 + }, + { + "epoch": 6.14, + "learning_rate": 7.711711711711713e-05, + "loss": 0.2634, + "step": 682 + }, + { + "epoch": 6.15, + "learning_rate": 7.693693693693694e-05, + "loss": 0.2405, + "step": 683 + }, + { + "epoch": 6.16, + "learning_rate": 7.675675675675677e-05, + "loss": 0.2635, + "step": 684 + }, + { + "epoch": 6.17, + "learning_rate": 7.657657657657657e-05, + "loss": 0.2163, + "step": 685 + }, + { + "epoch": 6.18, + "learning_rate": 7.63963963963964e-05, + "loss": 0.2326, + "step": 686 + }, + { + "epoch": 6.19, + "learning_rate": 7.621621621621621e-05, + "loss": 0.2794, + "step": 687 + }, + { + "epoch": 6.2, + "learning_rate": 7.603603603603604e-05, + "loss": 0.2338, + "step": 688 + }, + { + "epoch": 6.21, + "learning_rate": 7.585585585585586e-05, + "loss": 0.2364, + "step": 689 + }, + { + "epoch": 6.22, + "learning_rate": 7.567567567567568e-05, + "loss": 0.2184, + "step": 690 + }, + { + "epoch": 6.23, + "learning_rate": 7.54954954954955e-05, + "loss": 0.2297, + "step": 691 + }, + { + "epoch": 6.23, + "learning_rate": 7.531531531531531e-05, + "loss": 0.2275, + "step": 692 + }, + { + "epoch": 6.24, + "learning_rate": 7.513513513513514e-05, + "loss": 0.2431, + "step": 693 + }, + { + "epoch": 6.25, + "learning_rate": 7.495495495495496e-05, + "loss": 0.2014, + "step": 694 + }, + { + "epoch": 6.26, + "learning_rate": 7.477477477477479e-05, + "loss": 0.232, + "step": 695 + }, + { + "epoch": 6.27, + "learning_rate": 7.45945945945946e-05, + "loss": 0.2394, + "step": 696 + }, + { + "epoch": 6.28, + "learning_rate": 7.441441441441442e-05, + "loss": 0.252, + "step": 697 + }, + { + "epoch": 6.29, + "learning_rate": 7.423423423423423e-05, + "loss": 0.2705, + "step": 698 + }, + { + "epoch": 6.3, + "learning_rate": 7.405405405405406e-05, + "loss": 0.243, + "step": 699 + }, + { + "epoch": 6.31, + "learning_rate": 7.387387387387387e-05, + "loss": 0.2024, + "step": 700 + }, + { + "epoch": 6.32, + "learning_rate": 7.36936936936937e-05, + "loss": 0.2171, + "step": 701 + }, + { + "epoch": 6.32, + "learning_rate": 7.351351351351352e-05, + "loss": 0.2162, + "step": 702 + }, + { + "epoch": 6.33, + "learning_rate": 7.333333333333333e-05, + "loss": 0.2465, + "step": 703 + }, + { + "epoch": 6.34, + "learning_rate": 7.315315315315316e-05, + "loss": 0.2256, + "step": 704 + }, + { + "epoch": 6.35, + "learning_rate": 7.297297297297297e-05, + "loss": 0.2539, + "step": 705 + }, + { + "epoch": 6.36, + "learning_rate": 7.27927927927928e-05, + "loss": 0.2265, + "step": 706 + }, + { + "epoch": 6.37, + "learning_rate": 7.261261261261262e-05, + "loss": 0.266, + "step": 707 + }, + { + "epoch": 6.38, + "learning_rate": 7.243243243243245e-05, + "loss": 0.227, + "step": 708 + }, + { + "epoch": 6.39, + "learning_rate": 7.225225225225225e-05, + "loss": 0.2219, + "step": 709 + }, + { + "epoch": 6.4, + "learning_rate": 7.207207207207208e-05, + "loss": 0.2071, + "step": 710 + }, + { + "epoch": 6.41, + "learning_rate": 7.189189189189189e-05, + "loss": 0.247, + "step": 711 + }, + { + "epoch": 6.41, + "learning_rate": 7.171171171171172e-05, + "loss": 0.2306, + "step": 712 + }, + { + "epoch": 6.42, + "learning_rate": 7.153153153153153e-05, + "loss": 0.2531, + "step": 713 + }, + { + "epoch": 6.43, + "learning_rate": 7.135135135135136e-05, + "loss": 0.2534, + "step": 714 + }, + { + "epoch": 6.44, + "learning_rate": 7.117117117117116e-05, + "loss": 0.2102, + "step": 715 + }, + { + "epoch": 6.45, + "learning_rate": 7.099099099099099e-05, + "loss": 0.2476, + "step": 716 + }, + { + "epoch": 6.46, + "learning_rate": 7.081081081081081e-05, + "loss": 0.2229, + "step": 717 + }, + { + "epoch": 6.47, + "learning_rate": 7.063063063063064e-05, + "loss": 0.217, + "step": 718 + }, + { + "epoch": 6.48, + "learning_rate": 7.045045045045045e-05, + "loss": 0.2887, + "step": 719 + }, + { + "epoch": 6.49, + "learning_rate": 7.027027027027028e-05, + "loss": 0.2373, + "step": 720 + }, + { + "epoch": 6.5, + "learning_rate": 7.00900900900901e-05, + "loss": 0.2376, + "step": 721 + }, + { + "epoch": 6.5, + "learning_rate": 6.990990990990991e-05, + "loss": 0.2175, + "step": 722 + }, + { + "epoch": 6.51, + "learning_rate": 6.972972972972974e-05, + "loss": 0.2444, + "step": 723 + }, + { + "epoch": 6.52, + "learning_rate": 6.954954954954955e-05, + "loss": 0.2278, + "step": 724 + }, + { + "epoch": 6.53, + "learning_rate": 6.936936936936938e-05, + "loss": 0.2493, + "step": 725 + }, + { + "epoch": 6.54, + "learning_rate": 6.91891891891892e-05, + "loss": 0.2598, + "step": 726 + }, + { + "epoch": 6.55, + "learning_rate": 6.900900900900901e-05, + "loss": 0.2349, + "step": 727 + }, + { + "epoch": 6.56, + "learning_rate": 6.882882882882882e-05, + "loss": 0.1987, + "step": 728 + }, + { + "epoch": 6.57, + "learning_rate": 6.864864864864865e-05, + "loss": 0.2043, + "step": 729 + }, + { + "epoch": 6.58, + "learning_rate": 6.846846846846847e-05, + "loss": 0.2178, + "step": 730 + }, + { + "epoch": 6.59, + "learning_rate": 6.82882882882883e-05, + "loss": 0.2323, + "step": 731 + }, + { + "epoch": 6.59, + "learning_rate": 6.810810810810811e-05, + "loss": 0.1981, + "step": 732 + }, + { + "epoch": 6.6, + "learning_rate": 6.792792792792793e-05, + "loss": 0.2229, + "step": 733 + }, + { + "epoch": 6.61, + "learning_rate": 6.774774774774775e-05, + "loss": 0.2083, + "step": 734 + }, + { + "epoch": 6.62, + "learning_rate": 6.756756756756757e-05, + "loss": 0.2394, + "step": 735 + }, + { + "epoch": 6.63, + "learning_rate": 6.73873873873874e-05, + "loss": 0.2321, + "step": 736 + }, + { + "epoch": 6.64, + "learning_rate": 6.720720720720721e-05, + "loss": 0.2422, + "step": 737 + }, + { + "epoch": 6.65, + "learning_rate": 6.702702702702704e-05, + "loss": 0.2542, + "step": 738 + }, + { + "epoch": 6.66, + "learning_rate": 6.684684684684684e-05, + "loss": 0.2108, + "step": 739 + }, + { + "epoch": 6.67, + "learning_rate": 6.666666666666667e-05, + "loss": 0.2947, + "step": 740 + }, + { + "epoch": 6.68, + "learning_rate": 6.648648648648648e-05, + "loss": 0.2397, + "step": 741 + }, + { + "epoch": 6.68, + "learning_rate": 6.630630630630631e-05, + "loss": 0.2606, + "step": 742 + }, + { + "epoch": 6.69, + "learning_rate": 6.612612612612613e-05, + "loss": 0.2134, + "step": 743 + }, + { + "epoch": 6.7, + "learning_rate": 6.594594594594596e-05, + "loss": 0.2437, + "step": 744 + }, + { + "epoch": 6.71, + "learning_rate": 6.576576576576577e-05, + "loss": 0.2596, + "step": 745 + }, + { + "epoch": 6.72, + "learning_rate": 6.558558558558559e-05, + "loss": 0.3366, + "step": 746 + }, + { + "epoch": 6.73, + "learning_rate": 6.54054054054054e-05, + "loss": 0.2668, + "step": 747 + }, + { + "epoch": 6.74, + "learning_rate": 6.522522522522523e-05, + "loss": 0.2131, + "step": 748 + }, + { + "epoch": 6.75, + "learning_rate": 6.504504504504504e-05, + "loss": 0.2526, + "step": 749 + }, + { + "epoch": 6.76, + "learning_rate": 6.486486486486487e-05, + "loss": 0.2533, + "step": 750 + }, + { + "epoch": 6.77, + "learning_rate": 6.468468468468469e-05, + "loss": 0.2435, + "step": 751 + }, + { + "epoch": 6.77, + "learning_rate": 6.45045045045045e-05, + "loss": 0.2438, + "step": 752 + }, + { + "epoch": 6.78, + "learning_rate": 6.432432432432433e-05, + "loss": 0.2876, + "step": 753 + }, + { + "epoch": 6.79, + "learning_rate": 6.414414414414415e-05, + "loss": 0.2223, + "step": 754 + }, + { + "epoch": 6.8, + "learning_rate": 6.396396396396397e-05, + "loss": 0.2464, + "step": 755 + }, + { + "epoch": 6.81, + "learning_rate": 6.378378378378379e-05, + "loss": 0.2879, + "step": 756 + }, + { + "epoch": 6.82, + "learning_rate": 6.360360360360362e-05, + "loss": 0.2551, + "step": 757 + }, + { + "epoch": 6.83, + "learning_rate": 6.342342342342342e-05, + "loss": 0.2715, + "step": 758 + }, + { + "epoch": 6.84, + "learning_rate": 6.324324324324325e-05, + "loss": 0.2768, + "step": 759 + }, + { + "epoch": 6.85, + "learning_rate": 6.306306306306306e-05, + "loss": 0.2601, + "step": 760 + }, + { + "epoch": 6.86, + "learning_rate": 6.288288288288289e-05, + "loss": 0.2163, + "step": 761 + }, + { + "epoch": 6.86, + "learning_rate": 6.27027027027027e-05, + "loss": 0.1886, + "step": 762 + }, + { + "epoch": 6.87, + "learning_rate": 6.252252252252253e-05, + "loss": 0.2579, + "step": 763 + }, + { + "epoch": 6.88, + "learning_rate": 6.234234234234235e-05, + "loss": 0.2352, + "step": 764 + }, + { + "epoch": 6.89, + "learning_rate": 6.216216216216216e-05, + "loss": 0.2469, + "step": 765 + }, + { + "epoch": 6.9, + "learning_rate": 6.198198198198199e-05, + "loss": 0.2886, + "step": 766 + }, + { + "epoch": 6.91, + "learning_rate": 6.18018018018018e-05, + "loss": 0.2168, + "step": 767 + }, + { + "epoch": 6.92, + "learning_rate": 6.162162162162163e-05, + "loss": 0.2607, + "step": 768 + }, + { + "epoch": 6.93, + "learning_rate": 6.144144144144145e-05, + "loss": 0.253, + "step": 769 + }, + { + "epoch": 6.94, + "learning_rate": 6.126126126126126e-05, + "loss": 0.2079, + "step": 770 + }, + { + "epoch": 6.95, + "learning_rate": 6.108108108108108e-05, + "loss": 0.2232, + "step": 771 + }, + { + "epoch": 6.95, + "learning_rate": 6.09009009009009e-05, + "loss": 0.2432, + "step": 772 + }, + { + "epoch": 6.96, + "learning_rate": 6.072072072072072e-05, + "loss": 0.2366, + "step": 773 + }, + { + "epoch": 6.97, + "learning_rate": 6.0540540540540543e-05, + "loss": 0.2189, + "step": 774 + }, + { + "epoch": 6.98, + "learning_rate": 6.0360360360360365e-05, + "loss": 0.253, + "step": 775 + }, + { + "epoch": 6.99, + "learning_rate": 6.018018018018018e-05, + "loss": 0.2137, + "step": 776 + }, + { + "epoch": 7.0, + "learning_rate": 6e-05, + "loss": 0.2828, + "step": 777 + }, + { + "epoch": 7.01, + "learning_rate": 5.981981981981982e-05, + "loss": 0.1999, + "step": 778 + }, + { + "epoch": 7.02, + "learning_rate": 5.9639639639639645e-05, + "loss": 0.1985, + "step": 779 + }, + { + "epoch": 7.03, + "learning_rate": 5.9459459459459466e-05, + "loss": 0.183, + "step": 780 + }, + { + "epoch": 7.04, + "learning_rate": 5.927927927927929e-05, + "loss": 0.1728, + "step": 781 + }, + { + "epoch": 7.05, + "learning_rate": 5.9099099099099096e-05, + "loss": 0.184, + "step": 782 + }, + { + "epoch": 7.05, + "learning_rate": 5.891891891891892e-05, + "loss": 0.2041, + "step": 783 + }, + { + "epoch": 7.06, + "learning_rate": 5.873873873873874e-05, + "loss": 0.1711, + "step": 784 + }, + { + "epoch": 7.07, + "learning_rate": 5.855855855855856e-05, + "loss": 0.1968, + "step": 785 + }, + { + "epoch": 7.08, + "learning_rate": 5.837837837837838e-05, + "loss": 0.2036, + "step": 786 + }, + { + "epoch": 7.09, + "learning_rate": 5.8198198198198204e-05, + "loss": 0.2364, + "step": 787 + }, + { + "epoch": 7.1, + "learning_rate": 5.801801801801802e-05, + "loss": 0.2074, + "step": 788 + }, + { + "epoch": 7.11, + "learning_rate": 5.783783783783784e-05, + "loss": 0.2597, + "step": 789 + }, + { + "epoch": 7.12, + "learning_rate": 5.765765765765766e-05, + "loss": 0.1965, + "step": 790 + }, + { + "epoch": 7.13, + "learning_rate": 5.7477477477477484e-05, + "loss": 0.1815, + "step": 791 + }, + { + "epoch": 7.14, + "learning_rate": 5.7297297297297305e-05, + "loss": 0.1948, + "step": 792 + }, + { + "epoch": 7.14, + "learning_rate": 5.711711711711713e-05, + "loss": 0.2361, + "step": 793 + }, + { + "epoch": 7.15, + "learning_rate": 5.6936936936936935e-05, + "loss": 0.2092, + "step": 794 + }, + { + "epoch": 7.16, + "learning_rate": 5.6756756756756757e-05, + "loss": 0.2038, + "step": 795 + }, + { + "epoch": 7.17, + "learning_rate": 5.657657657657658e-05, + "loss": 0.1696, + "step": 796 + }, + { + "epoch": 7.18, + "learning_rate": 5.63963963963964e-05, + "loss": 0.1944, + "step": 797 + }, + { + "epoch": 7.19, + "learning_rate": 5.621621621621622e-05, + "loss": 0.2115, + "step": 798 + }, + { + "epoch": 7.2, + "learning_rate": 5.603603603603604e-05, + "loss": 0.1758, + "step": 799 + }, + { + "epoch": 7.21, + "learning_rate": 5.585585585585585e-05, + "loss": 0.1908, + "step": 800 + }, + { + "epoch": 7.22, + "learning_rate": 5.567567567567567e-05, + "loss": 0.2106, + "step": 801 + }, + { + "epoch": 7.23, + "learning_rate": 5.5495495495495494e-05, + "loss": 0.2048, + "step": 802 + }, + { + "epoch": 7.23, + "learning_rate": 5.5315315315315316e-05, + "loss": 0.192, + "step": 803 + }, + { + "epoch": 7.24, + "learning_rate": 5.5135135135135144e-05, + "loss": 0.1656, + "step": 804 + }, + { + "epoch": 7.25, + "learning_rate": 5.4954954954954966e-05, + "loss": 0.2072, + "step": 805 + }, + { + "epoch": 7.26, + "learning_rate": 5.4774774774774774e-05, + "loss": 0.1902, + "step": 806 + }, + { + "epoch": 7.27, + "learning_rate": 5.4594594594594595e-05, + "loss": 0.2155, + "step": 807 + }, + { + "epoch": 7.28, + "learning_rate": 5.441441441441442e-05, + "loss": 0.1983, + "step": 808 + }, + { + "epoch": 7.29, + "learning_rate": 5.423423423423424e-05, + "loss": 0.19, + "step": 809 + }, + { + "epoch": 7.3, + "learning_rate": 5.405405405405406e-05, + "loss": 0.2128, + "step": 810 + }, + { + "epoch": 7.31, + "learning_rate": 5.387387387387388e-05, + "loss": 0.1976, + "step": 811 + }, + { + "epoch": 7.32, + "learning_rate": 5.369369369369369e-05, + "loss": 0.228, + "step": 812 + }, + { + "epoch": 7.32, + "learning_rate": 5.351351351351351e-05, + "loss": 0.2673, + "step": 813 + }, + { + "epoch": 7.33, + "learning_rate": 5.333333333333333e-05, + "loss": 0.218, + "step": 814 + }, + { + "epoch": 7.34, + "learning_rate": 5.3153153153153155e-05, + "loss": 0.1898, + "step": 815 + }, + { + "epoch": 7.35, + "learning_rate": 5.2972972972972976e-05, + "loss": 0.1921, + "step": 816 + }, + { + "epoch": 7.36, + "learning_rate": 5.27927927927928e-05, + "loss": 0.1932, + "step": 817 + }, + { + "epoch": 7.37, + "learning_rate": 5.261261261261261e-05, + "loss": 0.2193, + "step": 818 + }, + { + "epoch": 7.38, + "learning_rate": 5.2432432432432434e-05, + "loss": 0.1821, + "step": 819 + }, + { + "epoch": 7.39, + "learning_rate": 5.2252252252252256e-05, + "loss": 0.2112, + "step": 820 + }, + { + "epoch": 7.4, + "learning_rate": 5.207207207207208e-05, + "loss": 0.2047, + "step": 821 + }, + { + "epoch": 7.41, + "learning_rate": 5.18918918918919e-05, + "loss": 0.1936, + "step": 822 + }, + { + "epoch": 7.41, + "learning_rate": 5.171171171171172e-05, + "loss": 0.2419, + "step": 823 + }, + { + "epoch": 7.42, + "learning_rate": 5.153153153153153e-05, + "loss": 0.1825, + "step": 824 + }, + { + "epoch": 7.43, + "learning_rate": 5.135135135135135e-05, + "loss": 0.2011, + "step": 825 + }, + { + "epoch": 7.44, + "learning_rate": 5.117117117117117e-05, + "loss": 0.2126, + "step": 826 + }, + { + "epoch": 7.45, + "learning_rate": 5.0990990990990994e-05, + "loss": 0.2394, + "step": 827 + }, + { + "epoch": 7.46, + "learning_rate": 5.0810810810810815e-05, + "loss": 0.2179, + "step": 828 + }, + { + "epoch": 7.47, + "learning_rate": 5.063063063063064e-05, + "loss": 0.232, + "step": 829 + }, + { + "epoch": 7.48, + "learning_rate": 5.0450450450450445e-05, + "loss": 0.2246, + "step": 830 + }, + { + "epoch": 7.49, + "learning_rate": 5.0270270270270267e-05, + "loss": 0.1931, + "step": 831 + }, + { + "epoch": 7.5, + "learning_rate": 5.009009009009009e-05, + "loss": 0.1878, + "step": 832 + }, + { + "epoch": 7.5, + "learning_rate": 4.9909909909909917e-05, + "loss": 0.2133, + "step": 833 + }, + { + "epoch": 7.51, + "learning_rate": 4.972972972972974e-05, + "loss": 0.1934, + "step": 834 + }, + { + "epoch": 7.52, + "learning_rate": 4.954954954954955e-05, + "loss": 0.2257, + "step": 835 + }, + { + "epoch": 7.53, + "learning_rate": 4.9369369369369375e-05, + "loss": 0.1962, + "step": 836 + }, + { + "epoch": 7.54, + "learning_rate": 4.9189189189189196e-05, + "loss": 0.1722, + "step": 837 + }, + { + "epoch": 7.55, + "learning_rate": 4.900900900900901e-05, + "loss": 0.1857, + "step": 838 + }, + { + "epoch": 7.56, + "learning_rate": 4.882882882882883e-05, + "loss": 0.2043, + "step": 839 + }, + { + "epoch": 7.57, + "learning_rate": 4.8648648648648654e-05, + "loss": 0.1977, + "step": 840 + }, + { + "epoch": 7.58, + "learning_rate": 4.846846846846847e-05, + "loss": 0.2503, + "step": 841 + }, + { + "epoch": 7.59, + "learning_rate": 4.828828828828829e-05, + "loss": 0.2246, + "step": 842 + }, + { + "epoch": 7.59, + "learning_rate": 4.810810810810811e-05, + "loss": 0.2141, + "step": 843 + }, + { + "epoch": 7.6, + "learning_rate": 4.792792792792793e-05, + "loss": 0.2114, + "step": 844 + }, + { + "epoch": 7.61, + "learning_rate": 4.774774774774775e-05, + "loss": 0.207, + "step": 845 + }, + { + "epoch": 7.62, + "learning_rate": 4.756756756756757e-05, + "loss": 0.2045, + "step": 846 + }, + { + "epoch": 7.63, + "learning_rate": 4.738738738738739e-05, + "loss": 0.1935, + "step": 847 + }, + { + "epoch": 7.64, + "learning_rate": 4.7207207207207214e-05, + "loss": 0.2299, + "step": 848 + }, + { + "epoch": 7.65, + "learning_rate": 4.7027027027027035e-05, + "loss": 0.2157, + "step": 849 + }, + { + "epoch": 7.66, + "learning_rate": 4.684684684684685e-05, + "loss": 0.2246, + "step": 850 + }, + { + "epoch": 7.67, + "learning_rate": 4.666666666666667e-05, + "loss": 0.1902, + "step": 851 + }, + { + "epoch": 7.68, + "learning_rate": 4.648648648648649e-05, + "loss": 0.2111, + "step": 852 + }, + { + "epoch": 7.68, + "learning_rate": 4.630630630630631e-05, + "loss": 0.198, + "step": 853 + }, + { + "epoch": 7.69, + "learning_rate": 4.612612612612613e-05, + "loss": 0.2126, + "step": 854 + }, + { + "epoch": 7.7, + "learning_rate": 4.594594594594595e-05, + "loss": 0.2093, + "step": 855 + }, + { + "epoch": 7.71, + "learning_rate": 4.5765765765765766e-05, + "loss": 0.2755, + "step": 856 + }, + { + "epoch": 7.72, + "learning_rate": 4.558558558558559e-05, + "loss": 0.2028, + "step": 857 + }, + { + "epoch": 7.73, + "learning_rate": 4.540540540540541e-05, + "loss": 0.2026, + "step": 858 + }, + { + "epoch": 7.74, + "learning_rate": 4.5225225225225224e-05, + "loss": 0.1991, + "step": 859 + }, + { + "epoch": 7.75, + "learning_rate": 4.5045045045045046e-05, + "loss": 0.2005, + "step": 860 + }, + { + "epoch": 7.76, + "learning_rate": 4.486486486486487e-05, + "loss": 0.1966, + "step": 861 + }, + { + "epoch": 7.77, + "learning_rate": 4.468468468468469e-05, + "loss": 0.2325, + "step": 862 + }, + { + "epoch": 7.77, + "learning_rate": 4.450450450450451e-05, + "loss": 0.1986, + "step": 863 + }, + { + "epoch": 7.78, + "learning_rate": 4.432432432432433e-05, + "loss": 0.2288, + "step": 864 + }, + { + "epoch": 7.79, + "learning_rate": 4.414414414414415e-05, + "loss": 0.1883, + "step": 865 + }, + { + "epoch": 7.8, + "learning_rate": 4.396396396396397e-05, + "loss": 0.2734, + "step": 866 + }, + { + "epoch": 7.81, + "learning_rate": 4.378378378378379e-05, + "loss": 0.2244, + "step": 867 + }, + { + "epoch": 7.82, + "learning_rate": 4.3603603603603605e-05, + "loss": 0.2134, + "step": 868 + }, + { + "epoch": 7.83, + "learning_rate": 4.342342342342343e-05, + "loss": 0.2495, + "step": 869 + }, + { + "epoch": 7.84, + "learning_rate": 4.324324324324325e-05, + "loss": 0.2268, + "step": 870 + }, + { + "epoch": 7.85, + "learning_rate": 4.306306306306306e-05, + "loss": 0.2232, + "step": 871 + }, + { + "epoch": 7.86, + "learning_rate": 4.2882882882882885e-05, + "loss": 0.2142, + "step": 872 + }, + { + "epoch": 7.86, + "learning_rate": 4.2702702702702706e-05, + "loss": 0.1927, + "step": 873 + }, + { + "epoch": 7.87, + "learning_rate": 4.252252252252252e-05, + "loss": 0.2356, + "step": 874 + }, + { + "epoch": 7.88, + "learning_rate": 4.234234234234234e-05, + "loss": 0.1987, + "step": 875 + }, + { + "epoch": 7.89, + "learning_rate": 4.2162162162162164e-05, + "loss": 0.203, + "step": 876 + }, + { + "epoch": 7.9, + "learning_rate": 4.1981981981981986e-05, + "loss": 0.2034, + "step": 877 + }, + { + "epoch": 7.91, + "learning_rate": 4.180180180180181e-05, + "loss": 0.221, + "step": 878 + }, + { + "epoch": 7.92, + "learning_rate": 4.162162162162163e-05, + "loss": 0.1952, + "step": 879 + }, + { + "epoch": 7.93, + "learning_rate": 4.1441441441441444e-05, + "loss": 0.1798, + "step": 880 + }, + { + "epoch": 7.94, + "learning_rate": 4.1261261261261266e-05, + "loss": 0.2057, + "step": 881 + }, + { + "epoch": 7.95, + "learning_rate": 4.108108108108109e-05, + "loss": 0.2084, + "step": 882 + }, + { + "epoch": 7.95, + "learning_rate": 4.09009009009009e-05, + "loss": 0.2067, + "step": 883 + }, + { + "epoch": 7.96, + "learning_rate": 4.0720720720720724e-05, + "loss": 0.2017, + "step": 884 + }, + { + "epoch": 7.97, + "learning_rate": 4.0540540540540545e-05, + "loss": 0.1786, + "step": 885 + }, + { + "epoch": 7.98, + "learning_rate": 4.036036036036036e-05, + "loss": 0.2265, + "step": 886 + }, + { + "epoch": 7.99, + "learning_rate": 4.018018018018018e-05, + "loss": 0.2221, + "step": 887 + }, + { + "epoch": 8.0, + "learning_rate": 4e-05, + "loss": 0.1592, + "step": 888 + }, + { + "epoch": 8.01, + "learning_rate": 3.981981981981982e-05, + "loss": 0.1533, + "step": 889 + }, + { + "epoch": 8.02, + "learning_rate": 3.963963963963964e-05, + "loss": 0.201, + "step": 890 + }, + { + "epoch": 8.03, + "learning_rate": 3.945945945945946e-05, + "loss": 0.2034, + "step": 891 + }, + { + "epoch": 8.04, + "learning_rate": 3.927927927927928e-05, + "loss": 0.1815, + "step": 892 + }, + { + "epoch": 8.05, + "learning_rate": 3.9099099099099105e-05, + "loss": 0.1729, + "step": 893 + }, + { + "epoch": 8.05, + "learning_rate": 3.8918918918918926e-05, + "loss": 0.203, + "step": 894 + }, + { + "epoch": 8.06, + "learning_rate": 3.873873873873874e-05, + "loss": 0.1611, + "step": 895 + }, + { + "epoch": 8.07, + "learning_rate": 3.855855855855856e-05, + "loss": 0.2099, + "step": 896 + }, + { + "epoch": 8.08, + "learning_rate": 3.8378378378378384e-05, + "loss": 0.1435, + "step": 897 + }, + { + "epoch": 8.09, + "learning_rate": 3.81981981981982e-05, + "loss": 0.1857, + "step": 898 + }, + { + "epoch": 8.1, + "learning_rate": 3.801801801801802e-05, + "loss": 0.2327, + "step": 899 + }, + { + "epoch": 8.11, + "learning_rate": 3.783783783783784e-05, + "loss": 0.1875, + "step": 900 + }, + { + "epoch": 8.12, + "learning_rate": 3.765765765765766e-05, + "loss": 0.1824, + "step": 901 + }, + { + "epoch": 8.13, + "learning_rate": 3.747747747747748e-05, + "loss": 0.1671, + "step": 902 + }, + { + "epoch": 8.14, + "learning_rate": 3.72972972972973e-05, + "loss": 0.157, + "step": 903 + }, + { + "epoch": 8.14, + "learning_rate": 3.7117117117117115e-05, + "loss": 0.2169, + "step": 904 + }, + { + "epoch": 8.15, + "learning_rate": 3.693693693693694e-05, + "loss": 0.1918, + "step": 905 + }, + { + "epoch": 8.16, + "learning_rate": 3.675675675675676e-05, + "loss": 0.2088, + "step": 906 + }, + { + "epoch": 8.17, + "learning_rate": 3.657657657657658e-05, + "loss": 0.1751, + "step": 907 + }, + { + "epoch": 8.18, + "learning_rate": 3.63963963963964e-05, + "loss": 0.2023, + "step": 908 + }, + { + "epoch": 8.19, + "learning_rate": 3.621621621621622e-05, + "loss": 0.1897, + "step": 909 + }, + { + "epoch": 8.2, + "learning_rate": 3.603603603603604e-05, + "loss": 0.1792, + "step": 910 + }, + { + "epoch": 8.21, + "learning_rate": 3.585585585585586e-05, + "loss": 0.177, + "step": 911 + }, + { + "epoch": 8.22, + "learning_rate": 3.567567567567568e-05, + "loss": 0.1938, + "step": 912 + }, + { + "epoch": 8.23, + "learning_rate": 3.5495495495495496e-05, + "loss": 0.1987, + "step": 913 + }, + { + "epoch": 8.23, + "learning_rate": 3.531531531531532e-05, + "loss": 0.1757, + "step": 914 + }, + { + "epoch": 8.24, + "learning_rate": 3.513513513513514e-05, + "loss": 0.2028, + "step": 915 + }, + { + "epoch": 8.25, + "learning_rate": 3.4954954954954954e-05, + "loss": 0.1704, + "step": 916 + }, + { + "epoch": 8.26, + "learning_rate": 3.4774774774774776e-05, + "loss": 0.1984, + "step": 917 + }, + { + "epoch": 8.27, + "learning_rate": 3.45945945945946e-05, + "loss": 0.2117, + "step": 918 + }, + { + "epoch": 8.28, + "learning_rate": 3.441441441441441e-05, + "loss": 0.1817, + "step": 919 + }, + { + "epoch": 8.29, + "learning_rate": 3.4234234234234234e-05, + "loss": 0.2177, + "step": 920 + }, + { + "epoch": 8.3, + "learning_rate": 3.4054054054054055e-05, + "loss": 0.1737, + "step": 921 + }, + { + "epoch": 8.31, + "learning_rate": 3.387387387387388e-05, + "loss": 0.1605, + "step": 922 + }, + { + "epoch": 8.32, + "learning_rate": 3.36936936936937e-05, + "loss": 0.1568, + "step": 923 + }, + { + "epoch": 8.32, + "learning_rate": 3.351351351351352e-05, + "loss": 0.1898, + "step": 924 + }, + { + "epoch": 8.33, + "learning_rate": 3.3333333333333335e-05, + "loss": 0.1615, + "step": 925 + }, + { + "epoch": 8.34, + "learning_rate": 3.3153153153153157e-05, + "loss": 0.1867, + "step": 926 + }, + { + "epoch": 8.35, + "learning_rate": 3.297297297297298e-05, + "loss": 0.1879, + "step": 927 + }, + { + "epoch": 8.36, + "learning_rate": 3.279279279279279e-05, + "loss": 0.1822, + "step": 928 + }, + { + "epoch": 8.37, + "learning_rate": 3.2612612612612615e-05, + "loss": 0.1782, + "step": 929 + }, + { + "epoch": 8.38, + "learning_rate": 3.2432432432432436e-05, + "loss": 0.1718, + "step": 930 + }, + { + "epoch": 8.39, + "learning_rate": 3.225225225225225e-05, + "loss": 0.1944, + "step": 931 + }, + { + "epoch": 8.4, + "learning_rate": 3.207207207207207e-05, + "loss": 0.2228, + "step": 932 + }, + { + "epoch": 8.41, + "learning_rate": 3.1891891891891894e-05, + "loss": 0.1929, + "step": 933 + }, + { + "epoch": 8.41, + "learning_rate": 3.171171171171171e-05, + "loss": 0.1967, + "step": 934 + }, + { + "epoch": 8.42, + "learning_rate": 3.153153153153153e-05, + "loss": 0.1918, + "step": 935 + }, + { + "epoch": 8.43, + "learning_rate": 3.135135135135135e-05, + "loss": 0.1658, + "step": 936 + }, + { + "epoch": 8.44, + "learning_rate": 3.1171171171171174e-05, + "loss": 0.1878, + "step": 937 + }, + { + "epoch": 8.45, + "learning_rate": 3.0990990990990995e-05, + "loss": 0.1712, + "step": 938 + }, + { + "epoch": 8.46, + "learning_rate": 3.081081081081082e-05, + "loss": 0.1775, + "step": 939 + }, + { + "epoch": 8.47, + "learning_rate": 3.063063063063063e-05, + "loss": 0.1869, + "step": 940 + }, + { + "epoch": 8.48, + "learning_rate": 3.045045045045045e-05, + "loss": 0.1943, + "step": 941 + }, + { + "epoch": 8.49, + "learning_rate": 3.0270270270270272e-05, + "loss": 0.1863, + "step": 942 + }, + { + "epoch": 8.5, + "learning_rate": 3.009009009009009e-05, + "loss": 0.1746, + "step": 943 + }, + { + "epoch": 8.5, + "learning_rate": 2.990990990990991e-05, + "loss": 0.175, + "step": 944 + }, + { + "epoch": 8.51, + "learning_rate": 2.9729729729729733e-05, + "loss": 0.2085, + "step": 945 + }, + { + "epoch": 8.52, + "learning_rate": 2.9549549549549548e-05, + "loss": 0.1638, + "step": 946 + }, + { + "epoch": 8.53, + "learning_rate": 2.936936936936937e-05, + "loss": 0.2147, + "step": 947 + }, + { + "epoch": 8.54, + "learning_rate": 2.918918918918919e-05, + "loss": 0.1774, + "step": 948 + }, + { + "epoch": 8.55, + "learning_rate": 2.900900900900901e-05, + "loss": 0.179, + "step": 949 + }, + { + "epoch": 8.56, + "learning_rate": 2.882882882882883e-05, + "loss": 0.1927, + "step": 950 + }, + { + "epoch": 8.57, + "learning_rate": 2.8648648648648653e-05, + "loss": 0.1809, + "step": 951 + }, + { + "epoch": 8.58, + "learning_rate": 2.8468468468468467e-05, + "loss": 0.168, + "step": 952 + }, + { + "epoch": 8.59, + "learning_rate": 2.828828828828829e-05, + "loss": 0.1853, + "step": 953 + }, + { + "epoch": 8.59, + "learning_rate": 2.810810810810811e-05, + "loss": 0.2057, + "step": 954 + }, + { + "epoch": 8.6, + "learning_rate": 2.7927927927927926e-05, + "loss": 0.1757, + "step": 955 + }, + { + "epoch": 8.61, + "learning_rate": 2.7747747747747747e-05, + "loss": 0.1706, + "step": 956 + }, + { + "epoch": 8.62, + "learning_rate": 2.7567567567567572e-05, + "loss": 0.1854, + "step": 957 + }, + { + "epoch": 8.63, + "learning_rate": 2.7387387387387387e-05, + "loss": 0.1873, + "step": 958 + }, + { + "epoch": 8.64, + "learning_rate": 2.720720720720721e-05, + "loss": 0.1783, + "step": 959 + }, + { + "epoch": 8.65, + "learning_rate": 2.702702702702703e-05, + "loss": 0.1709, + "step": 960 + }, + { + "epoch": 8.66, + "learning_rate": 2.6846846846846845e-05, + "loss": 0.1766, + "step": 961 + }, + { + "epoch": 8.67, + "learning_rate": 2.6666666666666667e-05, + "loss": 0.21, + "step": 962 + }, + { + "epoch": 8.68, + "learning_rate": 2.6486486486486488e-05, + "loss": 0.196, + "step": 963 + }, + { + "epoch": 8.68, + "learning_rate": 2.6306306306306306e-05, + "loss": 0.2047, + "step": 964 + }, + { + "epoch": 8.69, + "learning_rate": 2.6126126126126128e-05, + "loss": 0.1878, + "step": 965 + }, + { + "epoch": 8.7, + "learning_rate": 2.594594594594595e-05, + "loss": 0.1713, + "step": 966 + }, + { + "epoch": 8.71, + "learning_rate": 2.5765765765765764e-05, + "loss": 0.169, + "step": 967 + }, + { + "epoch": 8.72, + "learning_rate": 2.5585585585585586e-05, + "loss": 0.1749, + "step": 968 + }, + { + "epoch": 8.73, + "learning_rate": 2.5405405405405408e-05, + "loss": 0.1968, + "step": 969 + }, + { + "epoch": 8.74, + "learning_rate": 2.5225225225225222e-05, + "loss": 0.173, + "step": 970 + }, + { + "epoch": 8.75, + "learning_rate": 2.5045045045045044e-05, + "loss": 0.208, + "step": 971 + }, + { + "epoch": 8.76, + "learning_rate": 2.486486486486487e-05, + "loss": 0.1588, + "step": 972 + }, + { + "epoch": 8.77, + "learning_rate": 2.4684684684684687e-05, + "loss": 0.1704, + "step": 973 + }, + { + "epoch": 8.77, + "learning_rate": 2.4504504504504506e-05, + "loss": 0.1907, + "step": 974 + }, + { + "epoch": 8.78, + "learning_rate": 2.4324324324324327e-05, + "loss": 0.1939, + "step": 975 + }, + { + "epoch": 8.79, + "learning_rate": 2.4144144144144145e-05, + "loss": 0.1676, + "step": 976 + }, + { + "epoch": 8.8, + "learning_rate": 2.3963963963963964e-05, + "loss": 0.2301, + "step": 977 + }, + { + "epoch": 8.81, + "learning_rate": 2.3783783783783785e-05, + "loss": 0.1836, + "step": 978 + }, + { + "epoch": 8.82, + "learning_rate": 2.3603603603603607e-05, + "loss": 0.1972, + "step": 979 + }, + { + "epoch": 8.83, + "learning_rate": 2.3423423423423425e-05, + "loss": 0.1865, + "step": 980 + }, + { + "epoch": 8.84, + "learning_rate": 2.3243243243243247e-05, + "loss": 0.168, + "step": 981 + }, + { + "epoch": 8.85, + "learning_rate": 2.3063063063063065e-05, + "loss": 0.1778, + "step": 982 + }, + { + "epoch": 8.86, + "learning_rate": 2.2882882882882883e-05, + "loss": 0.1863, + "step": 983 + }, + { + "epoch": 8.86, + "learning_rate": 2.2702702702702705e-05, + "loss": 0.1549, + "step": 984 + }, + { + "epoch": 8.87, + "learning_rate": 2.2522522522522523e-05, + "loss": 0.1869, + "step": 985 + }, + { + "epoch": 8.88, + "learning_rate": 2.2342342342342344e-05, + "loss": 0.2235, + "step": 986 + }, + { + "epoch": 8.89, + "learning_rate": 2.2162162162162166e-05, + "loss": 0.172, + "step": 987 + }, + { + "epoch": 8.9, + "learning_rate": 2.1981981981981984e-05, + "loss": 0.2134, + "step": 988 + }, + { + "epoch": 8.91, + "learning_rate": 2.1801801801801803e-05, + "loss": 0.1754, + "step": 989 + }, + { + "epoch": 8.92, + "learning_rate": 2.1621621621621624e-05, + "loss": 0.1436, + "step": 990 + }, + { + "epoch": 8.93, + "learning_rate": 2.1441441441441442e-05, + "loss": 0.1933, + "step": 991 + }, + { + "epoch": 8.94, + "learning_rate": 2.126126126126126e-05, + "loss": 0.2173, + "step": 992 + }, + { + "epoch": 8.95, + "learning_rate": 2.1081081081081082e-05, + "loss": 0.1751, + "step": 993 + }, + { + "epoch": 8.95, + "learning_rate": 2.0900900900900904e-05, + "loss": 0.177, + "step": 994 + }, + { + "epoch": 8.96, + "learning_rate": 2.0720720720720722e-05, + "loss": 0.1967, + "step": 995 + }, + { + "epoch": 8.97, + "learning_rate": 2.0540540540540544e-05, + "loss": 0.1582, + "step": 996 + }, + { + "epoch": 8.98, + "learning_rate": 2.0360360360360362e-05, + "loss": 0.1755, + "step": 997 + }, + { + "epoch": 8.99, + "learning_rate": 2.018018018018018e-05, + "loss": 0.1799, + "step": 998 + }, + { + "epoch": 9.0, + "learning_rate": 2e-05, + "loss": 0.2062, + "step": 999 + }, + { + "epoch": 9.01, + "learning_rate": 1.981981981981982e-05, + "loss": 0.1766, + "step": 1000 + }, + { + "epoch": 9.02, + "learning_rate": 1.963963963963964e-05, + "loss": 0.1705, + "step": 1001 + }, + { + "epoch": 9.03, + "learning_rate": 1.9459459459459463e-05, + "loss": 0.1559, + "step": 1002 + }, + { + "epoch": 9.04, + "learning_rate": 1.927927927927928e-05, + "loss": 0.1595, + "step": 1003 + }, + { + "epoch": 9.05, + "learning_rate": 1.90990990990991e-05, + "loss": 0.1711, + "step": 1004 + }, + { + "epoch": 9.05, + "learning_rate": 1.891891891891892e-05, + "loss": 0.1782, + "step": 1005 + }, + { + "epoch": 9.06, + "learning_rate": 1.873873873873874e-05, + "loss": 0.1841, + "step": 1006 + }, + { + "epoch": 9.07, + "learning_rate": 1.8558558558558558e-05, + "loss": 0.1725, + "step": 1007 + }, + { + "epoch": 9.08, + "learning_rate": 1.837837837837838e-05, + "loss": 0.1974, + "step": 1008 + }, + { + "epoch": 9.09, + "learning_rate": 1.81981981981982e-05, + "loss": 0.1765, + "step": 1009 + }, + { + "epoch": 9.1, + "learning_rate": 1.801801801801802e-05, + "loss": 0.1505, + "step": 1010 + }, + { + "epoch": 9.11, + "learning_rate": 1.783783783783784e-05, + "loss": 0.1642, + "step": 1011 + }, + { + "epoch": 9.12, + "learning_rate": 1.765765765765766e-05, + "loss": 0.1547, + "step": 1012 + }, + { + "epoch": 9.13, + "learning_rate": 1.7477477477477477e-05, + "loss": 0.163, + "step": 1013 + }, + { + "epoch": 9.14, + "learning_rate": 1.72972972972973e-05, + "loss": 0.1567, + "step": 1014 + }, + { + "epoch": 9.14, + "learning_rate": 1.7117117117117117e-05, + "loss": 0.1563, + "step": 1015 + }, + { + "epoch": 9.15, + "learning_rate": 1.693693693693694e-05, + "loss": 0.1349, + "step": 1016 + }, + { + "epoch": 9.16, + "learning_rate": 1.675675675675676e-05, + "loss": 0.1765, + "step": 1017 + }, + { + "epoch": 9.17, + "learning_rate": 1.6576576576576578e-05, + "loss": 0.1638, + "step": 1018 + }, + { + "epoch": 9.18, + "learning_rate": 1.6396396396396396e-05, + "loss": 0.16, + "step": 1019 + }, + { + "epoch": 9.19, + "learning_rate": 1.6216216216216218e-05, + "loss": 0.2045, + "step": 1020 + }, + { + "epoch": 9.2, + "learning_rate": 1.6036036036036036e-05, + "loss": 0.1729, + "step": 1021 + }, + { + "epoch": 9.21, + "learning_rate": 1.5855855855855855e-05, + "loss": 0.1687, + "step": 1022 + }, + { + "epoch": 9.22, + "learning_rate": 1.5675675675675676e-05, + "loss": 0.1639, + "step": 1023 + }, + { + "epoch": 9.23, + "learning_rate": 1.5495495495495498e-05, + "loss": 0.1709, + "step": 1024 + }, + { + "epoch": 9.23, + "learning_rate": 1.5315315315315316e-05, + "loss": 0.1806, + "step": 1025 + }, + { + "epoch": 9.24, + "learning_rate": 1.5135135135135136e-05, + "loss": 0.192, + "step": 1026 + }, + { + "epoch": 9.25, + "learning_rate": 1.4954954954954956e-05, + "loss": 0.1636, + "step": 1027 + }, + { + "epoch": 9.26, + "learning_rate": 1.4774774774774774e-05, + "loss": 0.1797, + "step": 1028 + }, + { + "epoch": 9.27, + "learning_rate": 1.4594594594594596e-05, + "loss": 0.1976, + "step": 1029 + }, + { + "epoch": 9.28, + "learning_rate": 1.4414414414414416e-05, + "loss": 0.1657, + "step": 1030 + }, + { + "epoch": 9.29, + "learning_rate": 1.4234234234234234e-05, + "loss": 0.178, + "step": 1031 + }, + { + "epoch": 9.3, + "learning_rate": 1.4054054054054055e-05, + "loss": 0.1709, + "step": 1032 + }, + { + "epoch": 9.31, + "learning_rate": 1.3873873873873874e-05, + "loss": 0.1604, + "step": 1033 + }, + { + "epoch": 9.32, + "learning_rate": 1.3693693693693693e-05, + "loss": 0.1804, + "step": 1034 + }, + { + "epoch": 9.32, + "learning_rate": 1.3513513513513515e-05, + "loss": 0.1661, + "step": 1035 + }, + { + "epoch": 9.33, + "learning_rate": 1.3333333333333333e-05, + "loss": 0.1542, + "step": 1036 + }, + { + "epoch": 9.34, + "learning_rate": 1.3153153153153153e-05, + "loss": 0.1807, + "step": 1037 + }, + { + "epoch": 9.35, + "learning_rate": 1.2972972972972975e-05, + "loss": 0.1976, + "step": 1038 + }, + { + "epoch": 9.36, + "learning_rate": 1.2792792792792793e-05, + "loss": 0.162, + "step": 1039 + }, + { + "epoch": 9.37, + "learning_rate": 1.2612612612612611e-05, + "loss": 0.173, + "step": 1040 + }, + { + "epoch": 9.38, + "learning_rate": 1.2432432432432435e-05, + "loss": 0.1679, + "step": 1041 + }, + { + "epoch": 9.39, + "learning_rate": 1.2252252252252253e-05, + "loss": 0.1732, + "step": 1042 + }, + { + "epoch": 9.4, + "learning_rate": 1.2072072072072073e-05, + "loss": 0.1774, + "step": 1043 + }, + { + "epoch": 9.41, + "learning_rate": 1.1891891891891893e-05, + "loss": 0.1579, + "step": 1044 + }, + { + "epoch": 9.41, + "learning_rate": 1.1711711711711713e-05, + "loss": 0.1631, + "step": 1045 + }, + { + "epoch": 9.42, + "learning_rate": 1.1531531531531532e-05, + "loss": 0.1698, + "step": 1046 + }, + { + "epoch": 9.43, + "learning_rate": 1.1351351351351352e-05, + "loss": 0.1702, + "step": 1047 + }, + { + "epoch": 9.44, + "learning_rate": 1.1171171171171172e-05, + "loss": 0.1565, + "step": 1048 + }, + { + "epoch": 9.45, + "learning_rate": 1.0990990990990992e-05, + "loss": 0.1443, + "step": 1049 + }, + { + "epoch": 9.46, + "learning_rate": 1.0810810810810812e-05, + "loss": 0.1805, + "step": 1050 + }, + { + "epoch": 9.47, + "learning_rate": 1.063063063063063e-05, + "loss": 0.1462, + "step": 1051 + }, + { + "epoch": 9.48, + "learning_rate": 1.0450450450450452e-05, + "loss": 0.1723, + "step": 1052 + }, + { + "epoch": 9.49, + "learning_rate": 1.0270270270270272e-05, + "loss": 0.1684, + "step": 1053 + }, + { + "epoch": 9.5, + "learning_rate": 1.009009009009009e-05, + "loss": 0.1866, + "step": 1054 + }, + { + "epoch": 9.5, + "learning_rate": 9.90990990990991e-06, + "loss": 0.1562, + "step": 1055 + }, + { + "epoch": 9.51, + "learning_rate": 9.729729729729732e-06, + "loss": 0.1614, + "step": 1056 + }, + { + "epoch": 9.52, + "learning_rate": 9.54954954954955e-06, + "loss": 0.1592, + "step": 1057 + }, + { + "epoch": 9.53, + "learning_rate": 9.36936936936937e-06, + "loss": 0.1659, + "step": 1058 + }, + { + "epoch": 9.54, + "learning_rate": 9.18918918918919e-06, + "loss": 0.1632, + "step": 1059 + }, + { + "epoch": 9.55, + "learning_rate": 9.00900900900901e-06, + "loss": 0.1697, + "step": 1060 + }, + { + "epoch": 9.56, + "learning_rate": 8.82882882882883e-06, + "loss": 0.1893, + "step": 1061 + }, + { + "epoch": 9.57, + "learning_rate": 8.64864864864865e-06, + "loss": 0.1768, + "step": 1062 + }, + { + "epoch": 9.58, + "learning_rate": 8.46846846846847e-06, + "loss": 0.1828, + "step": 1063 + }, + { + "epoch": 9.59, + "learning_rate": 8.288288288288289e-06, + "loss": 0.1687, + "step": 1064 + }, + { + "epoch": 9.59, + "learning_rate": 8.108108108108109e-06, + "loss": 0.1662, + "step": 1065 + }, + { + "epoch": 9.6, + "learning_rate": 7.927927927927927e-06, + "loss": 0.1681, + "step": 1066 + }, + { + "epoch": 9.61, + "learning_rate": 7.747747747747749e-06, + "loss": 0.1521, + "step": 1067 + }, + { + "epoch": 9.62, + "learning_rate": 7.567567567567568e-06, + "loss": 0.158, + "step": 1068 + }, + { + "epoch": 9.63, + "learning_rate": 7.387387387387387e-06, + "loss": 0.1679, + "step": 1069 + }, + { + "epoch": 9.64, + "learning_rate": 7.207207207207208e-06, + "loss": 0.158, + "step": 1070 + }, + { + "epoch": 9.65, + "learning_rate": 7.027027027027028e-06, + "loss": 0.1688, + "step": 1071 + }, + { + "epoch": 9.66, + "learning_rate": 6.846846846846847e-06, + "loss": 0.1714, + "step": 1072 + }, + { + "epoch": 9.67, + "learning_rate": 6.666666666666667e-06, + "loss": 0.1693, + "step": 1073 + }, + { + "epoch": 9.68, + "learning_rate": 6.486486486486487e-06, + "loss": 0.1465, + "step": 1074 + }, + { + "epoch": 9.68, + "learning_rate": 6.306306306306306e-06, + "loss": 0.1723, + "step": 1075 + }, + { + "epoch": 9.69, + "learning_rate": 6.126126126126126e-06, + "loss": 0.1704, + "step": 1076 + }, + { + "epoch": 9.7, + "learning_rate": 5.945945945945946e-06, + "loss": 0.143, + "step": 1077 + }, + { + "epoch": 9.71, + "learning_rate": 5.765765765765766e-06, + "loss": 0.1696, + "step": 1078 + }, + { + "epoch": 9.72, + "learning_rate": 5.585585585585586e-06, + "loss": 0.1596, + "step": 1079 + }, + { + "epoch": 9.73, + "learning_rate": 5.405405405405406e-06, + "loss": 0.2013, + "step": 1080 + }, + { + "epoch": 9.74, + "learning_rate": 5.225225225225226e-06, + "loss": 0.191, + "step": 1081 + }, + { + "epoch": 9.75, + "learning_rate": 5.045045045045045e-06, + "loss": 0.177, + "step": 1082 + }, + { + "epoch": 9.76, + "learning_rate": 4.864864864864866e-06, + "loss": 0.1849, + "step": 1083 + }, + { + "epoch": 9.77, + "learning_rate": 4.684684684684685e-06, + "loss": 0.1549, + "step": 1084 + }, + { + "epoch": 9.77, + "learning_rate": 4.504504504504505e-06, + "loss": 0.1875, + "step": 1085 + }, + { + "epoch": 9.78, + "learning_rate": 4.324324324324325e-06, + "loss": 0.1544, + "step": 1086 + }, + { + "epoch": 9.79, + "learning_rate": 4.1441441441441446e-06, + "loss": 0.1816, + "step": 1087 + }, + { + "epoch": 9.8, + "learning_rate": 3.963963963963964e-06, + "loss": 0.1707, + "step": 1088 + }, + { + "epoch": 9.81, + "learning_rate": 3.783783783783784e-06, + "loss": 0.1575, + "step": 1089 + }, + { + "epoch": 9.82, + "learning_rate": 3.603603603603604e-06, + "loss": 0.1607, + "step": 1090 + }, + { + "epoch": 9.83, + "learning_rate": 3.4234234234234234e-06, + "loss": 0.1771, + "step": 1091 + }, + { + "epoch": 9.84, + "learning_rate": 3.2432432432432437e-06, + "loss": 0.1688, + "step": 1092 + }, + { + "epoch": 9.85, + "learning_rate": 3.063063063063063e-06, + "loss": 0.1689, + "step": 1093 + }, + { + "epoch": 9.86, + "learning_rate": 2.882882882882883e-06, + "loss": 0.1711, + "step": 1094 + }, + { + "epoch": 9.86, + "learning_rate": 2.702702702702703e-06, + "loss": 0.1696, + "step": 1095 + }, + { + "epoch": 9.87, + "learning_rate": 2.5225225225225225e-06, + "loss": 0.1863, + "step": 1096 + }, + { + "epoch": 9.88, + "learning_rate": 2.3423423423423424e-06, + "loss": 0.1648, + "step": 1097 + }, + { + "epoch": 9.89, + "learning_rate": 2.1621621621621623e-06, + "loss": 0.1883, + "step": 1098 + }, + { + "epoch": 9.9, + "learning_rate": 1.981981981981982e-06, + "loss": 0.1593, + "step": 1099 + }, + { + "epoch": 9.91, + "learning_rate": 1.801801801801802e-06, + "loss": 0.1602, + "step": 1100 + }, + { + "epoch": 9.92, + "learning_rate": 1.6216216216216219e-06, + "loss": 0.2187, + "step": 1101 + }, + { + "epoch": 9.93, + "learning_rate": 1.4414414414414416e-06, + "loss": 0.1863, + "step": 1102 + }, + { + "epoch": 9.94, + "learning_rate": 1.2612612612612613e-06, + "loss": 0.1656, + "step": 1103 + }, + { + "epoch": 9.95, + "learning_rate": 1.0810810810810812e-06, + "loss": 0.1939, + "step": 1104 + }, + { + "epoch": 9.95, + "learning_rate": 9.00900900900901e-07, + "loss": 0.1407, + "step": 1105 + }, + { + "epoch": 9.96, + "learning_rate": 7.207207207207208e-07, + "loss": 0.1753, + "step": 1106 + }, + { + "epoch": 9.97, + "learning_rate": 5.405405405405406e-07, + "loss": 0.1542, + "step": 1107 + }, + { + "epoch": 9.98, + "learning_rate": 3.603603603603604e-07, + "loss": 0.1609, + "step": 1108 + }, + { + "epoch": 9.99, + "learning_rate": 1.801801801801802e-07, + "loss": 0.1702, + "step": 1109 + }, + { + "epoch": 10.0, + "learning_rate": 0.0, + "loss": 0.1462, + "step": 1110 + }, + { + "epoch": 10.0, + "step": 1110, + "total_flos": 6398509737984000.0, + "train_loss": 0.5453134933838973, + "train_runtime": 627.0712, + "train_samples_per_second": 56.246, + "train_steps_per_second": 1.77 } ], - "max_steps": 333, - "num_train_epochs": 3, - "total_flos": 1915204220928000.0, + "max_steps": 1110, + "num_train_epochs": 10, + "total_flos": 6398509737984000.0, "trial_name": null, "trial_params": null }