|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 19.154929577464788, |
|
"eval_steps": 100, |
|
"global_step": 340, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 5e-06, |
|
"loss": 1.5896, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1e-05, |
|
"loss": 1.6521, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.5e-05, |
|
"loss": 1.3654, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2e-05, |
|
"loss": 1.3569, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.4352, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 3e-05, |
|
"loss": 1.425, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.5e-05, |
|
"loss": 1.3529, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4e-05, |
|
"loss": 1.384, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 4.5e-05, |
|
"loss": 1.3881, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 5e-05, |
|
"loss": 1.5593, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 4.999886713385432e-05, |
|
"loss": 1.3423, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.999546863808815e-05, |
|
"loss": 1.4725, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 4.9989804820704735e-05, |
|
"loss": 1.4342, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 4.9981876195011844e-05, |
|
"loss": 1.3769, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 4.99716834795752e-05, |
|
"loss": 1.3976, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.995922759815339e-05, |
|
"loss": 1.328, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.994450967961413e-05, |
|
"loss": 1.266, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.992753105783194e-05, |
|
"loss": 1.4671, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 4.9908293271567286e-05, |
|
"loss": 1.3973, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 4.988679806432712e-05, |
|
"loss": 1.3835, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 4.9863047384206835e-05, |
|
"loss": 1.2203, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 4.9837043383713753e-05, |
|
"loss": 1.3752, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 4.980878841957203e-05, |
|
"loss": 1.4142, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 4.977828505250903e-05, |
|
"loss": 1.3657, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 4.9745536047023324e-05, |
|
"loss": 1.309, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 4.971054437113406e-05, |
|
"loss": 1.3004, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 4.967331319611206e-05, |
|
"loss": 1.3309, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 4.963384589619233e-05, |
|
"loss": 1.4956, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 4.959214604826831e-05, |
|
"loss": 1.2682, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 4.9548217431567665e-05, |
|
"loss": 1.391, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 4.9502064027309836e-05, |
|
"loss": 1.3102, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 4.9453690018345144e-05, |
|
"loss": 1.3891, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 4.9403099788775754e-05, |
|
"loss": 1.3538, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 4.935029792355834e-05, |
|
"loss": 1.3443, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 4.929528920808854e-05, |
|
"loss": 1.3384, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 4.923807862776728e-05, |
|
"loss": 1.2968, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 4.917867136754893e-05, |
|
"loss": 1.2624, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 4.91170728114714e-05, |
|
"loss": 1.3547, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 4.9053288542168185e-05, |
|
"loss": 1.3857, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 4.898732434036244e-05, |
|
"loss": 1.2261, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 4.8919186184343046e-05, |
|
"loss": 1.4072, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 4.8848880249422815e-05, |
|
"loss": 1.3283, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 4.877641290737884e-05, |
|
"loss": 1.2464, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 4.870179072587499e-05, |
|
"loss": 1.3433, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 4.862502046786671e-05, |
|
"loss": 1.2861, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 4.854610909098812e-05, |
|
"loss": 1.1888, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 4.8465063746921395e-05, |
|
"loss": 1.2509, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 4.838189178074867e-05, |
|
"loss": 1.4449, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 4.829660073028631e-05, |
|
"loss": 1.4645, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 4.8209198325401815e-05, |
|
"loss": 1.3586, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 4.811969248731323e-05, |
|
"loss": 1.3542, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 4.802809132787125e-05, |
|
"loss": 1.3145, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 4.793440314882408e-05, |
|
"loss": 1.2343, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 4.783863644106502e-05, |
|
"loss": 1.2946, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 4.774079988386296e-05, |
|
"loss": 1.2621, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 4.764090234407577e-05, |
|
"loss": 1.3581, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 4.753895287534673e-05, |
|
"loss": 1.3891, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 4.743496071728396e-05, |
|
"loss": 1.334, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 4.73289352946231e-05, |
|
"loss": 1.3105, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 4.722088621637309e-05, |
|
"loss": 1.2921, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 4.711082327494536e-05, |
|
"loss": 1.2406, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 3.49, |
|
"learning_rate": 4.6998756445266336e-05, |
|
"loss": 1.4246, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"learning_rate": 4.688469588387339e-05, |
|
"loss": 1.2126, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 4.6768651927994434e-05, |
|
"loss": 1.3662, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 4.665063509461097e-05, |
|
"loss": 1.2044, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"learning_rate": 4.653065607950502e-05, |
|
"loss": 1.4344, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 4.640872575628973e-05, |
|
"loss": 1.3652, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 4.628485517542392e-05, |
|
"loss": 1.231, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"learning_rate": 4.6159055563210604e-05, |
|
"loss": 1.1948, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 4.6031338320779534e-05, |
|
"loss": 1.2484, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.59017150230539e-05, |
|
"loss": 1.2968, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 4.5770197417701365e-05, |
|
"loss": 1.1691, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 4.563679742406935e-05, |
|
"loss": 1.224, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 4.550152713210478e-05, |
|
"loss": 1.2669, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 4.23, |
|
"learning_rate": 4.5364398801258396e-05, |
|
"loss": 1.3218, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"learning_rate": 4.522542485937369e-05, |
|
"loss": 1.3309, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"learning_rate": 4.508461790156056e-05, |
|
"loss": 1.3356, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 4.4941990689053886e-05, |
|
"loss": 1.3367, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 4.479755614805688e-05, |
|
"loss": 1.3568, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 4.465132736856969e-05, |
|
"loss": 1.29, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 4.450331760320302e-05, |
|
"loss": 1.3522, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 4.4353540265977064e-05, |
|
"loss": 1.3021, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 4.4202008931105795e-05, |
|
"loss": 1.303, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 4.73, |
|
"learning_rate": 4.404873733176678e-05, |
|
"loss": 1.313, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 4.389373935885646e-05, |
|
"loss": 1.231, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 4.373702905973135e-05, |
|
"loss": 1.2908, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"learning_rate": 4.357862063693486e-05, |
|
"loss": 1.2575, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 4.96, |
|
"learning_rate": 4.341852844691012e-05, |
|
"loss": 1.1942, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 4.3256766998698936e-05, |
|
"loss": 1.1607, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 4.309335095262676e-05, |
|
"loss": 1.3386, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 4.292829511897409e-05, |
|
"loss": 1.371, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 4.276161445663423e-05, |
|
"loss": 1.236, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 4.259332407175751e-05, |
|
"loss": 1.2531, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 4.242343921638234e-05, |
|
"loss": 1.2593, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 4.2251975287052804e-05, |
|
"loss": 1.1245, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 4.2078947823423364e-05, |
|
"loss": 1.2538, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 4.1904372506850484e-05, |
|
"loss": 1.2847, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 4.172826515897146e-05, |
|
"loss": 1.2071, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 5.58, |
|
"learning_rate": 4.155064174027047e-05, |
|
"loss": 1.2664, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"learning_rate": 4.137151834863213e-05, |
|
"loss": 1.1919, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"eval_loss": 1.2261024713516235, |
|
"eval_runtime": 1.4353, |
|
"eval_samples_per_second": 6.967, |
|
"eval_steps_per_second": 3.484, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"learning_rate": 4.119091121788256e-05, |
|
"loss": 1.1509, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 5.75, |
|
"learning_rate": 4.100883671631806e-05, |
|
"loss": 1.3013, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 4.082531134522176e-05, |
|
"loss": 1.313, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 4.064035173736804e-05, |
|
"loss": 1.2176, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 5.92, |
|
"learning_rate": 4.045397465551513e-05, |
|
"loss": 1.1004, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 4.0266196990885955e-05, |
|
"loss": 1.3575, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 4.007703576163724e-05, |
|
"loss": 1.2923, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 3.98865081113172e-05, |
|
"loss": 1.2083, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 3.969463130731183e-05, |
|
"loss": 1.2141, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 3.9501422739279956e-05, |
|
"loss": 1.3297, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"learning_rate": 3.9306899917577245e-05, |
|
"loss": 1.0901, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 3.911108047166924e-05, |
|
"loss": 1.2235, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"learning_rate": 3.89139821485336e-05, |
|
"loss": 1.341, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"learning_rate": 3.871562281105175e-05, |
|
"loss": 1.362, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 6.48, |
|
"learning_rate": 3.851602043638994e-05, |
|
"loss": 1.2738, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 6.54, |
|
"learning_rate": 3.8315193114369996e-05, |
|
"loss": 1.2264, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 3.8113159045829864e-05, |
|
"loss": 1.171, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 3.790993654097405e-05, |
|
"loss": 1.1848, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 3.770554401771423e-05, |
|
"loss": 1.1545, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 1.2799, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 3.72933231161401e-05, |
|
"loss": 1.2278, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 3.708553209711409e-05, |
|
"loss": 1.1959, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 3.6876645774874876e-05, |
|
"loss": 1.1151, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 3.6666683080641846e-05, |
|
"loss": 1.2178, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"learning_rate": 3.645566304318526e-05, |
|
"loss": 1.1584, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 7.1, |
|
"learning_rate": 3.624360478710165e-05, |
|
"loss": 1.0959, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 7.15, |
|
"learning_rate": 3.603052753108053e-05, |
|
"loss": 1.3026, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 3.581645058616271e-05, |
|
"loss": 1.2007, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"learning_rate": 3.5601393353990046e-05, |
|
"loss": 1.2242, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 3.5385375325047166e-05, |
|
"loss": 1.2737, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"learning_rate": 3.516841607689501e-05, |
|
"loss": 1.2724, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"learning_rate": 3.495053527239656e-05, |
|
"loss": 1.1916, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 7.49, |
|
"learning_rate": 3.4731752657934794e-05, |
|
"loss": 1.2107, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 3.4512088061623075e-05, |
|
"loss": 1.246, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 3.4291561391508185e-05, |
|
"loss": 1.1841, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 7.66, |
|
"learning_rate": 3.4070192633766025e-05, |
|
"loss": 1.1898, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 7.72, |
|
"learning_rate": 3.3848001850890296e-05, |
|
"loss": 1.1195, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 7.77, |
|
"learning_rate": 3.3625009179874265e-05, |
|
"loss": 1.1685, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 7.83, |
|
"learning_rate": 3.3401234830385756e-05, |
|
"loss": 1.2748, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"learning_rate": 3.3176699082935545e-05, |
|
"loss": 1.1697, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"learning_rate": 3.295142228703938e-05, |
|
"loss": 1.1624, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 3.272542485937369e-05, |
|
"loss": 1.1679, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 8.06, |
|
"learning_rate": 3.249872728192527e-05, |
|
"loss": 1.1163, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 8.11, |
|
"learning_rate": 3.2271350100134975e-05, |
|
"loss": 1.1319, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"learning_rate": 3.2043313921035743e-05, |
|
"loss": 1.2151, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 8.23, |
|
"learning_rate": 3.181463941138495e-05, |
|
"loss": 1.1339, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 8.28, |
|
"learning_rate": 3.158534729579142e-05, |
|
"loss": 1.1503, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 8.34, |
|
"learning_rate": 3.135545835483718e-05, |
|
"loss": 1.2326, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"learning_rate": 3.11249934231941e-05, |
|
"loss": 1.2938, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 3.0893973387735687e-05, |
|
"loss": 1.1454, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 8.51, |
|
"learning_rate": 3.0662419185644115e-05, |
|
"loss": 1.2504, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 3.0430351802512698e-05, |
|
"loss": 0.987, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 3.0197792270443982e-05, |
|
"loss": 1.1387, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"learning_rate": 2.996476166614364e-05, |
|
"loss": 1.1142, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"learning_rate": 2.9731281109010256e-05, |
|
"loss": 1.1714, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 2.9497371759221347e-05, |
|
"loss": 1.1858, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 8.85, |
|
"learning_rate": 2.92630548158156e-05, |
|
"loss": 1.2169, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 2.9028351514771606e-05, |
|
"loss": 1.2204, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 2.8793283127083292e-05, |
|
"loss": 1.0313, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 2.8557870956832132e-05, |
|
"loss": 1.2432, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 9.07, |
|
"learning_rate": 2.8322136339256356e-05, |
|
"loss": 1.0614, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"learning_rate": 2.808610063881737e-05, |
|
"loss": 1.1682, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 9.18, |
|
"learning_rate": 2.7849785247263515e-05, |
|
"loss": 1.0238, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"learning_rate": 2.761321158169134e-05, |
|
"loss": 1.166, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 2.7376401082604564e-05, |
|
"loss": 1.0965, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 9.35, |
|
"learning_rate": 2.7139375211970996e-05, |
|
"loss": 1.096, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 9.41, |
|
"learning_rate": 2.6902155451277377e-05, |
|
"loss": 1.1161, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 9.46, |
|
"learning_rate": 2.6664763299582602e-05, |
|
"loss": 1.1328, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 9.52, |
|
"learning_rate": 2.6427220271569203e-05, |
|
"loss": 1.0747, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 9.58, |
|
"learning_rate": 2.6189547895593562e-05, |
|
"loss": 1.1092, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 2.5951767711734753e-05, |
|
"loss": 1.1624, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 9.69, |
|
"learning_rate": 2.5713901269842404e-05, |
|
"loss": 1.1434, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 2.5475970127583666e-05, |
|
"loss": 1.1167, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"learning_rate": 2.523799584848942e-05, |
|
"loss": 1.0509, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.1449, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 9.92, |
|
"learning_rate": 2.4762004151510584e-05, |
|
"loss": 1.109, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"learning_rate": 2.4524029872416333e-05, |
|
"loss": 1.0272, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 10.03, |
|
"learning_rate": 2.42860987301576e-05, |
|
"loss": 1.1489, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 10.08, |
|
"learning_rate": 2.4048232288265253e-05, |
|
"loss": 1.0263, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"learning_rate": 2.3810452104406444e-05, |
|
"loss": 1.0305, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 10.2, |
|
"learning_rate": 2.35727797284308e-05, |
|
"loss": 0.9612, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 10.25, |
|
"learning_rate": 2.3335236700417404e-05, |
|
"loss": 1.1883, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 10.31, |
|
"learning_rate": 2.309784454872262e-05, |
|
"loss": 1.192, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 10.37, |
|
"learning_rate": 2.2860624788029013e-05, |
|
"loss": 1.0919, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 10.42, |
|
"learning_rate": 2.2623598917395438e-05, |
|
"loss": 1.0974, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 10.48, |
|
"learning_rate": 2.238678841830867e-05, |
|
"loss": 1.0938, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 10.54, |
|
"learning_rate": 2.2150214752736488e-05, |
|
"loss": 0.9879, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 10.59, |
|
"learning_rate": 2.1913899361182632e-05, |
|
"loss": 1.0284, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 10.65, |
|
"learning_rate": 2.167786366074365e-05, |
|
"loss": 1.0184, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 10.7, |
|
"learning_rate": 2.1442129043167874e-05, |
|
"loss": 1.0419, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 10.76, |
|
"learning_rate": 2.1206716872916714e-05, |
|
"loss": 1.0631, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 10.82, |
|
"learning_rate": 2.09716484852284e-05, |
|
"loss": 1.0492, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"learning_rate": 2.0736945184184405e-05, |
|
"loss": 1.0924, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 10.93, |
|
"learning_rate": 2.0502628240778655e-05, |
|
"loss": 0.9832, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"learning_rate": 2.0268718890989753e-05, |
|
"loss": 1.1266, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 11.04, |
|
"learning_rate": 2.003523833385637e-05, |
|
"loss": 1.0762, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 11.1, |
|
"learning_rate": 1.980220772955602e-05, |
|
"loss": 1.0011, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 11.15, |
|
"learning_rate": 1.956964819748731e-05, |
|
"loss": 1.0195, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 11.21, |
|
"learning_rate": 1.9337580814355888e-05, |
|
"loss": 1.0442, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 11.27, |
|
"learning_rate": 1.9106026612264316e-05, |
|
"loss": 1.0285, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 11.27, |
|
"eval_loss": 1.3292417526245117, |
|
"eval_runtime": 1.4356, |
|
"eval_samples_per_second": 6.966, |
|
"eval_steps_per_second": 3.483, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"learning_rate": 1.8875006576805914e-05, |
|
"loss": 0.9805, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 11.38, |
|
"learning_rate": 1.8644541645162834e-05, |
|
"loss": 0.9562, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 11.44, |
|
"learning_rate": 1.8414652704208583e-05, |
|
"loss": 1.0457, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 11.49, |
|
"learning_rate": 1.8185360588615058e-05, |
|
"loss": 1.0217, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 11.55, |
|
"learning_rate": 1.795668607896426e-05, |
|
"loss": 1.1281, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 11.61, |
|
"learning_rate": 1.7728649899865024e-05, |
|
"loss": 0.9654, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 11.66, |
|
"learning_rate": 1.7501272718074737e-05, |
|
"loss": 0.9011, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 11.72, |
|
"learning_rate": 1.7274575140626318e-05, |
|
"loss": 1.097, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 11.77, |
|
"learning_rate": 1.7048577712960627e-05, |
|
"loss": 1.0652, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 11.83, |
|
"learning_rate": 1.682330091706446e-05, |
|
"loss": 1.011, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 11.89, |
|
"learning_rate": 1.6598765169614243e-05, |
|
"loss": 1.1001, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 11.94, |
|
"learning_rate": 1.637499082012574e-05, |
|
"loss": 1.1224, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 1.615199814910971e-05, |
|
"loss": 0.967, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 12.06, |
|
"learning_rate": 1.5929807366233977e-05, |
|
"loss": 0.9923, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 12.11, |
|
"learning_rate": 1.5708438608491814e-05, |
|
"loss": 1.0722, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 12.17, |
|
"learning_rate": 1.5487911938376924e-05, |
|
"loss": 0.9666, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 12.23, |
|
"learning_rate": 1.5268247342065215e-05, |
|
"loss": 0.9136, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 12.28, |
|
"learning_rate": 1.5049464727603452e-05, |
|
"loss": 0.9274, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 12.34, |
|
"learning_rate": 1.4831583923104999e-05, |
|
"loss": 1.0955, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 12.39, |
|
"learning_rate": 1.4614624674952842e-05, |
|
"loss": 0.9058, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 12.45, |
|
"learning_rate": 1.4398606646009963e-05, |
|
"loss": 0.8351, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 12.51, |
|
"learning_rate": 1.4183549413837289e-05, |
|
"loss": 1.119, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 12.56, |
|
"learning_rate": 1.3969472468919461e-05, |
|
"loss": 1.0418, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 12.62, |
|
"learning_rate": 1.3756395212898359e-05, |
|
"loss": 0.9102, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 12.68, |
|
"learning_rate": 1.354433695681474e-05, |
|
"loss": 0.997, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 12.73, |
|
"learning_rate": 1.3333316919358157e-05, |
|
"loss": 1.1737, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 12.79, |
|
"learning_rate": 1.3123354225125128e-05, |
|
"loss": 0.9887, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 12.85, |
|
"learning_rate": 1.2914467902885901e-05, |
|
"loss": 0.9984, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 12.9, |
|
"learning_rate": 1.2706676883859903e-05, |
|
"loss": 0.9507, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 12.96, |
|
"learning_rate": 1.2500000000000006e-05, |
|
"loss": 0.9395, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 13.01, |
|
"learning_rate": 1.2294455982285777e-05, |
|
"loss": 1.1445, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 13.07, |
|
"learning_rate": 1.2090063459025955e-05, |
|
"loss": 0.8469, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 13.13, |
|
"learning_rate": 1.1886840954170142e-05, |
|
"loss": 0.9936, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 13.18, |
|
"learning_rate": 1.1684806885630004e-05, |
|
"loss": 0.8797, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 13.24, |
|
"learning_rate": 1.148397956361007e-05, |
|
"loss": 1.1331, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 13.3, |
|
"learning_rate": 1.1284377188948258e-05, |
|
"loss": 0.9613, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 13.35, |
|
"learning_rate": 1.1086017851466413e-05, |
|
"loss": 0.9343, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 13.41, |
|
"learning_rate": 1.0888919528330777e-05, |
|
"loss": 1.0098, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 13.46, |
|
"learning_rate": 1.0693100082422763e-05, |
|
"loss": 0.9669, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 13.52, |
|
"learning_rate": 1.049857726072005e-05, |
|
"loss": 0.9686, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 13.58, |
|
"learning_rate": 1.0305368692688174e-05, |
|
"loss": 0.8916, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 13.63, |
|
"learning_rate": 1.0113491888682802e-05, |
|
"loss": 0.9274, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 13.69, |
|
"learning_rate": 9.922964238362762e-06, |
|
"loss": 0.9307, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 13.75, |
|
"learning_rate": 9.733803009114045e-06, |
|
"loss": 1.0112, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 13.8, |
|
"learning_rate": 9.546025344484869e-06, |
|
"loss": 1.0465, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 13.86, |
|
"learning_rate": 9.359648262631961e-06, |
|
"loss": 1.0822, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 13.92, |
|
"learning_rate": 9.174688654778243e-06, |
|
"loss": 0.8992, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 13.97, |
|
"learning_rate": 8.991163283681944e-06, |
|
"loss": 0.8778, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 14.03, |
|
"learning_rate": 8.809088782117452e-06, |
|
"loss": 1.0079, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 14.08, |
|
"learning_rate": 8.628481651367876e-06, |
|
"loss": 0.9508, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 14.14, |
|
"learning_rate": 8.449358259729537e-06, |
|
"loss": 0.997, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 14.2, |
|
"learning_rate": 8.271734841028553e-06, |
|
"loss": 0.8669, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 14.25, |
|
"learning_rate": 8.09562749314952e-06, |
|
"loss": 0.9721, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 14.31, |
|
"learning_rate": 7.921052176576644e-06, |
|
"loss": 1.0777, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 14.37, |
|
"learning_rate": 7.748024712947205e-06, |
|
"loss": 0.8644, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 14.42, |
|
"learning_rate": 7.576560783617668e-06, |
|
"loss": 0.9119, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 14.48, |
|
"learning_rate": 7.40667592824249e-06, |
|
"loss": 0.8645, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 14.54, |
|
"learning_rate": 7.238385543365783e-06, |
|
"loss": 0.8786, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 14.59, |
|
"learning_rate": 7.071704881025915e-06, |
|
"loss": 0.9539, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 14.65, |
|
"learning_rate": 6.906649047373246e-06, |
|
"loss": 0.9521, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 14.7, |
|
"learning_rate": 6.743233001301067e-06, |
|
"loss": 1.0708, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 14.76, |
|
"learning_rate": 6.5814715530898745e-06, |
|
"loss": 1.033, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 14.82, |
|
"learning_rate": 6.421379363065142e-06, |
|
"loss": 0.9344, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 14.87, |
|
"learning_rate": 6.2629709402686535e-06, |
|
"loss": 0.9491, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 14.93, |
|
"learning_rate": 6.106260641143546e-06, |
|
"loss": 0.8894, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 14.99, |
|
"learning_rate": 5.951262668233232e-06, |
|
"loss": 0.8825, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 15.04, |
|
"learning_rate": 5.797991068894201e-06, |
|
"loss": 0.925, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 15.1, |
|
"learning_rate": 5.646459734022938e-06, |
|
"loss": 0.8431, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 15.15, |
|
"learning_rate": 5.49668239679699e-06, |
|
"loss": 1.036, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 15.21, |
|
"learning_rate": 5.348672631430318e-06, |
|
"loss": 0.9463, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 15.27, |
|
"learning_rate": 5.202443851943126e-06, |
|
"loss": 0.9942, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 15.32, |
|
"learning_rate": 5.058009310946119e-06, |
|
"loss": 0.9657, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 15.38, |
|
"learning_rate": 4.915382098439436e-06, |
|
"loss": 0.9375, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 15.44, |
|
"learning_rate": 4.7745751406263165e-06, |
|
"loss": 0.9347, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 15.49, |
|
"learning_rate": 4.6356011987416075e-06, |
|
"loss": 0.9796, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 15.55, |
|
"learning_rate": 4.498472867895223e-06, |
|
"loss": 0.987, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 15.61, |
|
"learning_rate": 4.36320257593065e-06, |
|
"loss": 0.8946, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 15.66, |
|
"learning_rate": 4.229802582298633e-06, |
|
"loss": 0.9034, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 15.72, |
|
"learning_rate": 4.098284976946101e-06, |
|
"loss": 1.0294, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 15.77, |
|
"learning_rate": 3.968661679220468e-06, |
|
"loss": 0.9631, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 15.83, |
|
"learning_rate": 3.840944436789393e-06, |
|
"loss": 0.8831, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 15.89, |
|
"learning_rate": 3.715144824576078e-06, |
|
"loss": 0.9274, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 15.94, |
|
"learning_rate": 3.591274243710277e-06, |
|
"loss": 0.8896, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 3.4693439204949858e-06, |
|
"loss": 0.8672, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 16.06, |
|
"learning_rate": 3.3493649053890326e-06, |
|
"loss": 0.9043, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 16.11, |
|
"learning_rate": 3.2313480720055745e-06, |
|
"loss": 1.1059, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 16.17, |
|
"learning_rate": 3.1153041161266134e-06, |
|
"loss": 0.9241, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 16.23, |
|
"learning_rate": 3.0012435547336737e-06, |
|
"loss": 0.7761, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 16.28, |
|
"learning_rate": 2.889176725054643e-06, |
|
"loss": 1.0323, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 16.34, |
|
"learning_rate": 2.779113783626916e-06, |
|
"loss": 0.8534, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 16.39, |
|
"learning_rate": 2.671064705376905e-06, |
|
"loss": 0.7669, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 16.45, |
|
"learning_rate": 2.565039282716045e-06, |
|
"loss": 0.984, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 16.51, |
|
"learning_rate": 2.461047124653279e-06, |
|
"loss": 0.9542, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 16.56, |
|
"learning_rate": 2.3590976559242278e-06, |
|
"loss": 0.9439, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 16.62, |
|
"learning_rate": 2.2592001161370392e-06, |
|
"loss": 0.8776, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 16.68, |
|
"learning_rate": 2.1613635589349756e-06, |
|
"loss": 0.9211, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 16.73, |
|
"learning_rate": 2.0655968511759134e-06, |
|
"loss": 0.9145, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 16.79, |
|
"learning_rate": 1.97190867212875e-06, |
|
"loss": 0.9481, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 16.85, |
|
"learning_rate": 1.8803075126867715e-06, |
|
"loss": 0.9805, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 16.9, |
|
"learning_rate": 1.790801674598186e-06, |
|
"loss": 0.8387, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 16.9, |
|
"eval_loss": 1.3825798034667969, |
|
"eval_runtime": 1.4357, |
|
"eval_samples_per_second": 6.965, |
|
"eval_steps_per_second": 3.483, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 16.96, |
|
"learning_rate": 1.703399269713693e-06, |
|
"loss": 1.005, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 17.01, |
|
"learning_rate": 1.6181082192513353e-06, |
|
"loss": 0.9032, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 17.07, |
|
"learning_rate": 1.534936253078606e-06, |
|
"loss": 1.0064, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 17.13, |
|
"learning_rate": 1.4538909090118846e-06, |
|
"loss": 0.8834, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 17.18, |
|
"learning_rate": 1.3749795321332887e-06, |
|
"loss": 0.93, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 17.24, |
|
"learning_rate": 1.2982092741250145e-06, |
|
"loss": 0.9181, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 17.3, |
|
"learning_rate": 1.2235870926211619e-06, |
|
"loss": 0.8043, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 17.35, |
|
"learning_rate": 1.1511197505771843e-06, |
|
"loss": 0.8552, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 17.41, |
|
"learning_rate": 1.0808138156569614e-06, |
|
"loss": 0.8864, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 17.46, |
|
"learning_rate": 1.0126756596375686e-06, |
|
"loss": 0.8856, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 17.52, |
|
"learning_rate": 9.467114578318226e-07, |
|
"loss": 0.9447, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 17.58, |
|
"learning_rate": 8.829271885286094e-07, |
|
"loss": 0.8091, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 17.63, |
|
"learning_rate": 8.213286324510738e-07, |
|
"loss": 0.9956, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 17.69, |
|
"learning_rate": 7.619213722327185e-07, |
|
"loss": 0.88, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 17.75, |
|
"learning_rate": 7.047107919114588e-07, |
|
"loss": 0.9142, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 17.8, |
|
"learning_rate": 6.497020764416633e-07, |
|
"loss": 0.9984, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 17.86, |
|
"learning_rate": 5.969002112242494e-07, |
|
"loss": 0.8869, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 17.92, |
|
"learning_rate": 5.463099816548579e-07, |
|
"loss": 0.9627, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 17.97, |
|
"learning_rate": 4.979359726901639e-07, |
|
"loss": 0.9783, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 18.03, |
|
"learning_rate": 4.517825684323324e-07, |
|
"loss": 1.0067, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 18.08, |
|
"learning_rate": 4.078539517316987e-07, |
|
"loss": 0.9261, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 18.14, |
|
"learning_rate": 3.6615410380767544e-07, |
|
"loss": 1.0504, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 18.2, |
|
"learning_rate": 3.266868038879434e-07, |
|
"loss": 0.8995, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 18.25, |
|
"learning_rate": 2.894556288659395e-07, |
|
"loss": 0.915, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 18.31, |
|
"learning_rate": 2.544639529766829e-07, |
|
"loss": 0.8657, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 18.37, |
|
"learning_rate": 2.2171494749097245e-07, |
|
"loss": 0.9171, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 18.42, |
|
"learning_rate": 1.912115804279746e-07, |
|
"loss": 0.7903, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 18.48, |
|
"learning_rate": 1.6295661628624447e-07, |
|
"loss": 0.9827, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 18.54, |
|
"learning_rate": 1.3695261579316777e-07, |
|
"loss": 0.9373, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 18.59, |
|
"learning_rate": 1.1320193567288529e-07, |
|
"loss": 0.915, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 18.65, |
|
"learning_rate": 9.170672843271666e-08, |
|
"loss": 1.0085, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 18.7, |
|
"learning_rate": 7.246894216806355e-08, |
|
"loss": 0.9681, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 18.76, |
|
"learning_rate": 5.54903203858731e-08, |
|
"loss": 0.8392, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 18.82, |
|
"learning_rate": 4.07724018466088e-08, |
|
"loss": 0.9473, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 18.87, |
|
"learning_rate": 2.831652042480093e-08, |
|
"loss": 0.9534, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 18.93, |
|
"learning_rate": 1.812380498815991e-08, |
|
"loss": 0.9152, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 18.99, |
|
"learning_rate": 1.0195179295269252e-08, |
|
"loss": 0.8761, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 19.04, |
|
"learning_rate": 4.531361911855325e-09, |
|
"loss": 0.9116, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 19.1, |
|
"learning_rate": 1.132866145678313e-09, |
|
"loss": 0.9118, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 19.15, |
|
"learning_rate": 0.0, |
|
"loss": 0.8806, |
|
"step": 340 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 340, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 4.253890839379968e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|