|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"global_step": 206, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.8571428571428573e-06, |
|
"loss": 0.996, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.7142857142857145e-06, |
|
"loss": 1.0034, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8.571428571428571e-06, |
|
"loss": 0.9937, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.1428571428571429e-05, |
|
"loss": 0.994, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.4285714285714287e-05, |
|
"loss": 0.9761, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.7142857142857142e-05, |
|
"loss": 0.9838, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 2e-05, |
|
"loss": 0.9728, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.9998753895176576e-05, |
|
"loss": 0.9452, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.999501589126174e-05, |
|
"loss": 0.95, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9988786919844437e-05, |
|
"loss": 0.953, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 1.9980068533314937e-05, |
|
"loss": 0.9442, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.9968862904477936e-05, |
|
"loss": 0.8945, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.995517282601106e-05, |
|
"loss": 0.9333, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.993900170976888e-05, |
|
"loss": 0.9008, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.992035358593258e-05, |
|
"loss": 0.8652, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9899233102005573e-05, |
|
"loss": 0.8659, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.987564552165524e-05, |
|
"loss": 0.8572, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.984959672340111e-05, |
|
"loss": 0.8595, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1.9821093199149806e-05, |
|
"loss": 0.8946, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9790142052577148e-05, |
|
"loss": 0.8551, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9756750997357738e-05, |
|
"loss": 0.8567, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.972092835524257e-05, |
|
"loss": 0.8264, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 1.9682683053985073e-05, |
|
"loss": 0.8544, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.9642024625116117e-05, |
|
"loss": 0.807, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 1.959896320156857e-05, |
|
"loss": 0.816, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.955350951515195e-05, |
|
"loss": 0.8022, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.950567489387783e-05, |
|
"loss": 0.7919, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.945547125913667e-05, |
|
"loss": 0.7854, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 1.9402911122726756e-05, |
|
"loss": 0.7748, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9348007583735985e-05, |
|
"loss": 0.7631, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9290774325277305e-05, |
|
"loss": 0.7694, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.923122561107861e-05, |
|
"loss": 0.7693, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 1.916937628192789e-05, |
|
"loss": 0.7977, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.9105241751974624e-05, |
|
"loss": 0.7403, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.903883800488824e-05, |
|
"loss": 0.7152, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 1.8970181589874637e-05, |
|
"loss": 0.7462, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.8899289617551803e-05, |
|
"loss": 0.7399, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 1.882617975568547e-05, |
|
"loss": 0.6994, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.875087022478594e-05, |
|
"loss": 0.6904, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 1.867337979356715e-05, |
|
"loss": 0.7154, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.8593727774269122e-05, |
|
"loss": 0.683, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 1.851193401784495e-05, |
|
"loss": 0.6527, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.842801890901351e-05, |
|
"loss": 0.7025, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1.834200336117918e-05, |
|
"loss": 0.6714, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.8253908811219764e-05, |
|
"loss": 0.7061, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 1.8163757214143993e-05, |
|
"loss": 0.6749, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.8071571037619856e-05, |
|
"loss": 0.6753, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.7977373256375194e-05, |
|
"loss": 0.6536, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.7881187346471924e-05, |
|
"loss": 0.6698, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.77830372794553e-05, |
|
"loss": 0.6557, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.7682947516379706e-05, |
|
"loss": 0.6635, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 1.7580943001712457e-05, |
|
"loss": 0.6786, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.7477049157117093e-05, |
|
"loss": 0.634, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 1.737129187511779e-05, |
|
"loss": 0.6512, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.7263697512646397e-05, |
|
"loss": 0.6311, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 1.7154292884473712e-05, |
|
"loss": 0.657, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.7043105256526723e-05, |
|
"loss": 0.6448, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 1.693016233909332e-05, |
|
"loss": 0.6516, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.681549227991634e-05, |
|
"loss": 0.6134, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.6699123657178553e-05, |
|
"loss": 0.6768, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.658108547238038e-05, |
|
"loss": 0.6706, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 1.64614071431121e-05, |
|
"loss": 0.664, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.634011849572239e-05, |
|
"loss": 0.6412, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.6217249757884954e-05, |
|
"loss": 0.6058, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.609283155106517e-05, |
|
"loss": 0.6355, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 1.596689488288856e-05, |
|
"loss": 0.6553, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.5839471139413065e-05, |
|
"loss": 0.6181, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.571059207730695e-05, |
|
"loss": 0.6601, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 1.55802898159344e-05, |
|
"loss": 0.6285, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.5448596829350706e-05, |
|
"loss": 0.6398, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 1.5315545938209016e-05, |
|
"loss": 0.6388, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.5181170301580776e-05, |
|
"loss": 0.6174, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 1.5045503408691776e-05, |
|
"loss": 0.6202, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.4908579070575936e-05, |
|
"loss": 0.6329, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 1.4770431411648898e-05, |
|
"loss": 0.6149, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.4631094861203478e-05, |
|
"loss": 0.6222, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 1.4490604144829204e-05, |
|
"loss": 0.5883, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.4348994275757933e-05, |
|
"loss": 0.6191, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 1.4206300546137844e-05, |
|
"loss": 0.6168, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.4062558518237893e-05, |
|
"loss": 0.6291, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.3917804015584932e-05, |
|
"loss": 0.6263, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.3772073114035762e-05, |
|
"loss": 0.6145, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 1.3625402132786247e-05, |
|
"loss": 0.6068, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.3477827625319826e-05, |
|
"loss": 0.6052, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 1.3329386370297615e-05, |
|
"loss": 0.62, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.3180115362392383e-05, |
|
"loss": 0.6165, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 1.3030051803068729e-05, |
|
"loss": 0.6146, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.2879233091311667e-05, |
|
"loss": 0.6137, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 1.2727696814306034e-05, |
|
"loss": 0.5891, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.2575480738068971e-05, |
|
"loss": 0.6241, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.2422622798037833e-05, |
|
"loss": 0.57, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.2269161089615902e-05, |
|
"loss": 0.6129, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 1.2115133858678192e-05, |
|
"loss": 0.6138, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.1960579492039783e-05, |
|
"loss": 0.585, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.1805536507889021e-05, |
|
"loss": 0.5916, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.1650043546187994e-05, |
|
"loss": 0.5805, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 1.1494139359042612e-05, |
|
"loss": 0.593, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.1337862801044792e-05, |
|
"loss": 0.6136, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 1.1181252819589081e-05, |
|
"loss": 0.5979, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.1024348445166133e-05, |
|
"loss": 0.5984, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 1.086718878163551e-05, |
|
"loss": 0.5988, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.070981299648016e-05, |
|
"loss": 0.5895, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.0552260311045082e-05, |
|
"loss": 0.5827, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 1.0394569990762528e-05, |
|
"loss": 0.5592, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.0236781335366239e-05, |
|
"loss": 0.5977, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 1.0078933669097135e-05, |
|
"loss": 0.6285, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.92106633090287e-06, |
|
"loss": 0.5707, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.763218664633763e-06, |
|
"loss": 0.5734, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.605430009237474e-06, |
|
"loss": 0.5634, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 9.44773968895492e-06, |
|
"loss": 0.5805, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.290187003519841e-06, |
|
"loss": 0.6077, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.132811218364494e-06, |
|
"loss": 0.5553, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.975651554833869e-06, |
|
"loss": 0.5861, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 8.81874718041092e-06, |
|
"loss": 0.5703, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.662137198955211e-06, |
|
"loss": 0.6203, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 8.50586064095739e-06, |
|
"loss": 0.5881, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.349956453812009e-06, |
|
"loss": 0.5663, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 8.194463492110982e-06, |
|
"loss": 0.5783, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 8.03942050796022e-06, |
|
"loss": 0.5991, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 7.884866141321811e-06, |
|
"loss": 0.5875, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.730838910384098e-06, |
|
"loss": 0.5777, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 7.57737720196217e-06, |
|
"loss": 0.5558, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.424519261931036e-06, |
|
"loss": 0.5659, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 7.27230318569397e-06, |
|
"loss": 0.5724, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 7.1207669086883366e-06, |
|
"loss": 0.5906, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 6.969948196931272e-06, |
|
"loss": 0.6066, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 6.819884637607619e-06, |
|
"loss": 0.5358, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 6.670613629702391e-06, |
|
"loss": 0.5714, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 6.522172374680177e-06, |
|
"loss": 0.5963, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 6.374597867213756e-06, |
|
"loss": 0.5721, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6.2279268859642396e-06, |
|
"loss": 0.5789, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6.082195984415069e-06, |
|
"loss": 0.5486, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 5.937441481762112e-06, |
|
"loss": 0.5891, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 5.793699453862161e-06, |
|
"loss": 0.5825, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 5.651005724242072e-06, |
|
"loss": 0.5871, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 5.509395855170798e-06, |
|
"loss": 0.5727, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.368905138796523e-06, |
|
"loss": 0.5596, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.2295685883511086e-06, |
|
"loss": 0.564, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 5.091420929424065e-06, |
|
"loss": 0.5774, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.954496591308227e-06, |
|
"loss": 0.5657, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 4.818829698419225e-06, |
|
"loss": 0.5987, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.684454061790987e-06, |
|
"loss": 0.581, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.551403170649299e-06, |
|
"loss": 0.5937, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.4197101840656e-06, |
|
"loss": 0.5584, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 4.289407922693053e-06, |
|
"loss": 0.5957, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.1605288605869365e-06, |
|
"loss": 0.576, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 4.033105117111441e-06, |
|
"loss": 0.5939, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.907168448934836e-06, |
|
"loss": 0.5769, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.7827502421150497e-06, |
|
"loss": 0.5406, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.6598815042776135e-06, |
|
"loss": 0.5554, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.5385928568879012e-06, |
|
"loss": 0.5752, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.4189145276196244e-06, |
|
"loss": 0.5839, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 3.300876342821451e-06, |
|
"loss": 0.5926, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.1845077200836638e-06, |
|
"loss": 0.5547, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.0698376609066828e-06, |
|
"loss": 0.584, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.9568947434732777e-06, |
|
"loss": 0.5641, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 2.8457071155262885e-06, |
|
"loss": 0.5679, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.7363024873536093e-06, |
|
"loss": 0.5801, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 2.628708124882212e-06, |
|
"loss": 0.5783, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.52295084288291e-06, |
|
"loss": 0.5805, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 2.419056998287547e-06, |
|
"loss": 0.5629, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.3170524836202936e-06, |
|
"loss": 0.5662, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 2.216962720544703e-06, |
|
"loss": 0.5781, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.118812653528077e-06, |
|
"loss": 0.5541, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.022626743624807e-06, |
|
"loss": 0.5714, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.928428962380148e-06, |
|
"loss": 0.5708, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 1.8362427858560094e-06, |
|
"loss": 0.5806, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.74609118878024e-06, |
|
"loss": 0.5739, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 1.6579966388208257e-06, |
|
"loss": 0.5445, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.5719810909864941e-06, |
|
"loss": 0.5699, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.4880659821550547e-06, |
|
"loss": 0.5833, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 1.4062722257308803e-06, |
|
"loss": 0.5601, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.3266202064328548e-06, |
|
"loss": 0.5845, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 1.249129775214064e-06, |
|
"loss": 0.5776, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.1738202443145307e-06, |
|
"loss": 0.5798, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.100710382448198e-06, |
|
"loss": 0.5707, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 1.029818410125365e-06, |
|
"loss": 0.5948, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 9.611619951117657e-07, |
|
"loss": 0.5705, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 8.94758248025378e-07, |
|
"loss": 0.5589, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 8.306237180721121e-07, |
|
"loss": 0.5739, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 7.687743889213939e-07, |
|
"loss": 0.591, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 7.092256747226944e-07, |
|
"loss": 0.5556, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 6.519924162640168e-07, |
|
"loss": 0.5899, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 5.970888772732453e-07, |
|
"loss": 0.5464, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 5.445287408633304e-07, |
|
"loss": 0.5811, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 4.943251061221721e-07, |
|
"loss": 0.5808, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.464904848480522e-07, |
|
"loss": 0.5603, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.0103679843142895e-07, |
|
"loss": 0.5533, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.5797537488388326e-07, |
|
"loss": 0.5364, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 3.1731694601492834e-07, |
|
"loss": 0.5921, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.790716447574304e-07, |
|
"loss": 0.5815, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.4324900264226405e-07, |
|
"loss": 0.5596, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.098579474228546e-07, |
|
"loss": 0.5692, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 1.7890680085019597e-07, |
|
"loss": 0.551, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.504032765988961e-07, |
|
"loss": 0.5526, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 1.2435447834476254e-07, |
|
"loss": 0.5647, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 1.0076689799442874e-07, |
|
"loss": 0.5546, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 7.964641406742135e-08, |
|
"loss": 0.5609, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 6.099829023112236e-08, |
|
"loss": 0.5782, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 4.482717398894165e-08, |
|
"loss": 0.5829, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 3.1137095522068006e-08, |
|
"loss": 0.5691, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 1.993146668506585e-08, |
|
"loss": 0.5723, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 1.1213080155564327e-08, |
|
"loss": 0.557, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.984108738261828e-09, |
|
"loss": 0.58, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.246104823426908e-09, |
|
"loss": 0.5601, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.5731, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 206, |
|
"total_flos": 5.847863731316326e+16, |
|
"train_loss": 0.6453299609202783, |
|
"train_runtime": 605.87, |
|
"train_samples_per_second": 10.844, |
|
"train_steps_per_second": 0.34 |
|
} |
|
], |
|
"max_steps": 206, |
|
"num_train_epochs": 1, |
|
"total_flos": 5.847863731316326e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|