|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.45714285714285713, |
|
"eval_steps": 500, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.563270330429077, |
|
"learning_rate": 4e-05, |
|
"loss": 2.4127, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.333299160003662, |
|
"learning_rate": 8e-05, |
|
"loss": 2.3102, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 1.8613317012786865, |
|
"learning_rate": 0.00012, |
|
"loss": 2.1983, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 1.620126485824585, |
|
"learning_rate": 0.00016, |
|
"loss": 2.0917, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 1.0352119207382202, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9613, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.8601917624473572, |
|
"learning_rate": 0.00019979899497487438, |
|
"loss": 1.6927, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 1.273868203163147, |
|
"learning_rate": 0.00019959798994974876, |
|
"loss": 1.6828, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 1.0873847007751465, |
|
"learning_rate": 0.00019939698492462313, |
|
"loss": 1.5088, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.5800795555114746, |
|
"learning_rate": 0.0001991959798994975, |
|
"loss": 1.3702, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.693160355091095, |
|
"learning_rate": 0.00019899497487437187, |
|
"loss": 1.3718, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.49409618973731995, |
|
"learning_rate": 0.00019879396984924622, |
|
"loss": 1.3583, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.47029319405555725, |
|
"learning_rate": 0.00019859296482412062, |
|
"loss": 1.2791, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.43618088960647583, |
|
"learning_rate": 0.000198391959798995, |
|
"loss": 1.3161, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.3907912075519562, |
|
"learning_rate": 0.00019819095477386937, |
|
"loss": 1.2954, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.6292415857315063, |
|
"learning_rate": 0.0001979899497487437, |
|
"loss": 1.3397, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.37423521280288696, |
|
"learning_rate": 0.0001977889447236181, |
|
"loss": 1.3983, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.3845643699169159, |
|
"learning_rate": 0.00019758793969849249, |
|
"loss": 1.3349, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.3657298982143402, |
|
"learning_rate": 0.00019738693467336683, |
|
"loss": 1.2767, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.3727971315383911, |
|
"learning_rate": 0.0001971859296482412, |
|
"loss": 1.3672, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.35123032331466675, |
|
"learning_rate": 0.0001969849246231156, |
|
"loss": 1.3692, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.4003850221633911, |
|
"learning_rate": 0.00019678391959798995, |
|
"loss": 1.3412, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.3638221323490143, |
|
"learning_rate": 0.00019658291457286432, |
|
"loss": 1.2813, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.391216903924942, |
|
"learning_rate": 0.0001963819095477387, |
|
"loss": 1.2853, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.4370620846748352, |
|
"learning_rate": 0.0001961809045226131, |
|
"loss": 1.2524, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.3566085696220398, |
|
"learning_rate": 0.00019597989949748744, |
|
"loss": 1.3192, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.35438084602355957, |
|
"learning_rate": 0.00019577889447236181, |
|
"loss": 1.2858, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.3968108296394348, |
|
"learning_rate": 0.0001955778894472362, |
|
"loss": 1.3112, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.36512017250061035, |
|
"learning_rate": 0.00019537688442211056, |
|
"loss": 1.278, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.3982504606246948, |
|
"learning_rate": 0.00019517587939698493, |
|
"loss": 1.2392, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.38377949595451355, |
|
"learning_rate": 0.0001949748743718593, |
|
"loss": 1.2843, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.3582867980003357, |
|
"learning_rate": 0.00019477386934673368, |
|
"loss": 1.3008, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.3572194576263428, |
|
"learning_rate": 0.00019457286432160805, |
|
"loss": 1.294, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.35502907633781433, |
|
"learning_rate": 0.00019437185929648243, |
|
"loss": 1.3877, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.3649040460586548, |
|
"learning_rate": 0.0001941708542713568, |
|
"loss": 1.2966, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.3649256229400635, |
|
"learning_rate": 0.00019396984924623117, |
|
"loss": 1.2354, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.36085084080696106, |
|
"learning_rate": 0.00019376884422110552, |
|
"loss": 1.2409, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.35929059982299805, |
|
"learning_rate": 0.00019356783919597992, |
|
"loss": 1.243, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.3897881805896759, |
|
"learning_rate": 0.0001933668341708543, |
|
"loss": 1.3945, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.35484543442726135, |
|
"learning_rate": 0.00019316582914572864, |
|
"loss": 1.3433, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.35691192746162415, |
|
"learning_rate": 0.000192964824120603, |
|
"loss": 1.3243, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.3804129958152771, |
|
"learning_rate": 0.0001927638190954774, |
|
"loss": 1.2509, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.3623339831829071, |
|
"learning_rate": 0.00019256281407035178, |
|
"loss": 1.1799, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.3411855697631836, |
|
"learning_rate": 0.00019236180904522613, |
|
"loss": 1.2372, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.36590930819511414, |
|
"learning_rate": 0.0001921608040201005, |
|
"loss": 1.2585, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.30974116921424866, |
|
"learning_rate": 0.0001919597989949749, |
|
"loss": 1.2974, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.32794803380966187, |
|
"learning_rate": 0.00019175879396984925, |
|
"loss": 1.2696, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.33263906836509705, |
|
"learning_rate": 0.00019155778894472362, |
|
"loss": 1.3209, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.31748828291893005, |
|
"learning_rate": 0.000191356783919598, |
|
"loss": 1.278, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.34738561511039734, |
|
"learning_rate": 0.0001911557788944724, |
|
"loss": 1.2105, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.3313944339752197, |
|
"learning_rate": 0.00019095477386934674, |
|
"loss": 1.2527, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.33137476444244385, |
|
"learning_rate": 0.0001907537688442211, |
|
"loss": 1.2984, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.31752490997314453, |
|
"learning_rate": 0.00019055276381909548, |
|
"loss": 1.307, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.3111082911491394, |
|
"learning_rate": 0.00019035175879396986, |
|
"loss": 1.2769, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.31065696477890015, |
|
"learning_rate": 0.00019015075376884423, |
|
"loss": 1.3082, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.3382773697376251, |
|
"learning_rate": 0.0001899497487437186, |
|
"loss": 1.2744, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.34320947527885437, |
|
"learning_rate": 0.00018974874371859298, |
|
"loss": 1.3013, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.33131280541419983, |
|
"learning_rate": 0.00018954773869346732, |
|
"loss": 1.4066, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.3357389569282532, |
|
"learning_rate": 0.00018934673366834172, |
|
"loss": 1.2841, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.3200838267803192, |
|
"learning_rate": 0.0001891457286432161, |
|
"loss": 1.2654, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.3336584270000458, |
|
"learning_rate": 0.00018894472361809047, |
|
"loss": 1.1716, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.3128441274166107, |
|
"learning_rate": 0.00018874371859296481, |
|
"loss": 1.3009, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.30249112844467163, |
|
"learning_rate": 0.00018854271356783921, |
|
"loss": 1.2311, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.3263241946697235, |
|
"learning_rate": 0.0001883417085427136, |
|
"loss": 1.2344, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.32348358631134033, |
|
"learning_rate": 0.00018814070351758793, |
|
"loss": 1.3023, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.6508419513702393, |
|
"learning_rate": 0.0001879396984924623, |
|
"loss": 1.2028, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.34560996294021606, |
|
"learning_rate": 0.0001877386934673367, |
|
"loss": 1.389, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.36555996537208557, |
|
"learning_rate": 0.00018753768844221108, |
|
"loss": 1.3653, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.3195466697216034, |
|
"learning_rate": 0.00018733668341708543, |
|
"loss": 1.2412, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.30555933713912964, |
|
"learning_rate": 0.0001871356783919598, |
|
"loss": 1.2357, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.30776411294937134, |
|
"learning_rate": 0.0001869346733668342, |
|
"loss": 1.3112, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.31933915615081787, |
|
"learning_rate": 0.00018673366834170854, |
|
"loss": 1.1951, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.3241545259952545, |
|
"learning_rate": 0.00018653266331658292, |
|
"loss": 1.2717, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.3117482364177704, |
|
"learning_rate": 0.0001863316582914573, |
|
"loss": 1.3031, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.33056551218032837, |
|
"learning_rate": 0.0001861306532663317, |
|
"loss": 1.2098, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.32441195845603943, |
|
"learning_rate": 0.00018592964824120604, |
|
"loss": 1.2135, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 0.34216779470443726, |
|
"learning_rate": 0.0001857286432160804, |
|
"loss": 1.2531, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.32885247468948364, |
|
"learning_rate": 0.00018552763819095478, |
|
"loss": 1.3054, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.34541794657707214, |
|
"learning_rate": 0.00018532663316582915, |
|
"loss": 1.3207, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.30329057574272156, |
|
"learning_rate": 0.00018512562814070353, |
|
"loss": 1.2652, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.31469491124153137, |
|
"learning_rate": 0.0001849246231155779, |
|
"loss": 1.1961, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.3181230127811432, |
|
"learning_rate": 0.00018472361809045227, |
|
"loss": 1.3111, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.3181725740432739, |
|
"learning_rate": 0.00018452261306532662, |
|
"loss": 1.3353, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.3154084384441376, |
|
"learning_rate": 0.00018432160804020102, |
|
"loss": 1.2418, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.35061103105545044, |
|
"learning_rate": 0.0001841206030150754, |
|
"loss": 1.2332, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.3259966969490051, |
|
"learning_rate": 0.00018391959798994977, |
|
"loss": 1.3633, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.31192857027053833, |
|
"learning_rate": 0.0001837185929648241, |
|
"loss": 1.1886, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.32024237513542175, |
|
"learning_rate": 0.0001835175879396985, |
|
"loss": 1.2141, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.302498459815979, |
|
"learning_rate": 0.00018331658291457288, |
|
"loss": 1.237, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.3569789230823517, |
|
"learning_rate": 0.00018311557788944723, |
|
"loss": 1.3015, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.3121156692504883, |
|
"learning_rate": 0.0001829145728643216, |
|
"loss": 1.281, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.31279826164245605, |
|
"learning_rate": 0.000182713567839196, |
|
"loss": 1.2924, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.3210877478122711, |
|
"learning_rate": 0.00018251256281407038, |
|
"loss": 1.3082, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.331406831741333, |
|
"learning_rate": 0.00018231155778894472, |
|
"loss": 1.2434, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.3135213255882263, |
|
"learning_rate": 0.0001821105527638191, |
|
"loss": 1.2188, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.31146401166915894, |
|
"learning_rate": 0.0001819095477386935, |
|
"loss": 1.2484, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.32071712613105774, |
|
"learning_rate": 0.00018170854271356784, |
|
"loss": 1.1927, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.3343571722507477, |
|
"learning_rate": 0.00018150753768844221, |
|
"loss": 1.3443, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 0.3510550558567047, |
|
"learning_rate": 0.0001813065326633166, |
|
"loss": 1.2832, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.33436939120292664, |
|
"learning_rate": 0.00018110552763819096, |
|
"loss": 1.252, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.3175451159477234, |
|
"learning_rate": 0.00018090452261306533, |
|
"loss": 1.251, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.32603979110717773, |
|
"learning_rate": 0.0001807035175879397, |
|
"loss": 1.228, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.3073003590106964, |
|
"learning_rate": 0.00018050251256281408, |
|
"loss": 1.2659, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.3285619616508484, |
|
"learning_rate": 0.00018030150753768845, |
|
"loss": 1.2826, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.3038572072982788, |
|
"learning_rate": 0.00018010050251256282, |
|
"loss": 1.217, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.35778746008872986, |
|
"learning_rate": 0.0001798994974874372, |
|
"loss": 1.2901, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.2900612950325012, |
|
"learning_rate": 0.00017969849246231157, |
|
"loss": 1.2651, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.32928743958473206, |
|
"learning_rate": 0.00017949748743718592, |
|
"loss": 1.3143, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.32471874356269836, |
|
"learning_rate": 0.00017929648241206032, |
|
"loss": 1.1834, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.30989256501197815, |
|
"learning_rate": 0.0001790954773869347, |
|
"loss": 1.2216, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.3371771275997162, |
|
"learning_rate": 0.00017889447236180906, |
|
"loss": 1.197, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.31041428446769714, |
|
"learning_rate": 0.0001786934673366834, |
|
"loss": 1.27, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.3152185082435608, |
|
"learning_rate": 0.0001784924623115578, |
|
"loss": 1.2436, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.3227459490299225, |
|
"learning_rate": 0.00017829145728643218, |
|
"loss": 1.2401, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.3246959149837494, |
|
"learning_rate": 0.00017809045226130653, |
|
"loss": 1.2703, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.38032859563827515, |
|
"learning_rate": 0.0001778894472361809, |
|
"loss": 1.3266, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.33325478434562683, |
|
"learning_rate": 0.0001776884422110553, |
|
"loss": 1.2954, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.3178690969944, |
|
"learning_rate": 0.00017748743718592967, |
|
"loss": 1.1793, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.31393784284591675, |
|
"learning_rate": 0.00017728643216080402, |
|
"loss": 1.277, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.3150279223918915, |
|
"learning_rate": 0.0001770854271356784, |
|
"loss": 1.293, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 0.32476913928985596, |
|
"learning_rate": 0.0001768844221105528, |
|
"loss": 1.2569, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.36075925827026367, |
|
"learning_rate": 0.00017668341708542714, |
|
"loss": 1.205, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.33134496212005615, |
|
"learning_rate": 0.0001764824120603015, |
|
"loss": 1.2299, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.30507662892341614, |
|
"learning_rate": 0.00017628140703517588, |
|
"loss": 1.2883, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.34049952030181885, |
|
"learning_rate": 0.00017608040201005026, |
|
"loss": 1.214, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.3405919373035431, |
|
"learning_rate": 0.00017587939698492463, |
|
"loss": 1.2738, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.3306083679199219, |
|
"learning_rate": 0.000175678391959799, |
|
"loss": 1.2415, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.33770737051963806, |
|
"learning_rate": 0.00017547738693467338, |
|
"loss": 1.3233, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.3261878788471222, |
|
"learning_rate": 0.00017527638190954775, |
|
"loss": 1.2695, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.3433193266391754, |
|
"learning_rate": 0.00017507537688442212, |
|
"loss": 1.2052, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.3111405670642853, |
|
"learning_rate": 0.0001748743718592965, |
|
"loss": 1.2798, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.3630310297012329, |
|
"learning_rate": 0.00017467336683417087, |
|
"loss": 1.2567, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.31963038444519043, |
|
"learning_rate": 0.00017447236180904521, |
|
"loss": 1.2455, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.299695760011673, |
|
"learning_rate": 0.00017427135678391961, |
|
"loss": 1.207, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.3167514503002167, |
|
"learning_rate": 0.000174070351758794, |
|
"loss": 1.2378, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.31375688314437866, |
|
"learning_rate": 0.00017386934673366836, |
|
"loss": 1.2658, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.34311383962631226, |
|
"learning_rate": 0.0001736683417085427, |
|
"loss": 1.2004, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.31706517934799194, |
|
"learning_rate": 0.0001734673366834171, |
|
"loss": 1.1879, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.31296172738075256, |
|
"learning_rate": 0.00017326633165829148, |
|
"loss": 1.1866, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.3254072368144989, |
|
"learning_rate": 0.00017306532663316582, |
|
"loss": 1.1952, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.3165453374385834, |
|
"learning_rate": 0.0001728643216080402, |
|
"loss": 1.3459, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.35455992817878723, |
|
"learning_rate": 0.0001726633165829146, |
|
"loss": 1.2494, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 0.3116908073425293, |
|
"learning_rate": 0.00017246231155778897, |
|
"loss": 1.2225, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.3141638934612274, |
|
"learning_rate": 0.00017226130653266332, |
|
"loss": 1.3385, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.3096507787704468, |
|
"learning_rate": 0.0001720603015075377, |
|
"loss": 1.3257, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.3160630464553833, |
|
"learning_rate": 0.00017185929648241206, |
|
"loss": 1.2683, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.3342824876308441, |
|
"learning_rate": 0.00017165829145728644, |
|
"loss": 1.2551, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.3086145222187042, |
|
"learning_rate": 0.0001714572864321608, |
|
"loss": 1.2456, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.3001709282398224, |
|
"learning_rate": 0.00017125628140703518, |
|
"loss": 1.2287, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.3277103304862976, |
|
"learning_rate": 0.00017105527638190955, |
|
"loss": 1.3302, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.33616161346435547, |
|
"learning_rate": 0.00017085427135678393, |
|
"loss": 1.2604, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.3231915533542633, |
|
"learning_rate": 0.0001706532663316583, |
|
"loss": 1.2367, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.3305569291114807, |
|
"learning_rate": 0.00017045226130653267, |
|
"loss": 1.2387, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.35031118988990784, |
|
"learning_rate": 0.00017025125628140705, |
|
"loss": 1.2464, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.3142334222793579, |
|
"learning_rate": 0.00017005025125628142, |
|
"loss": 1.3614, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.31159430742263794, |
|
"learning_rate": 0.0001698492462311558, |
|
"loss": 1.2556, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.3273050785064697, |
|
"learning_rate": 0.00016964824120603016, |
|
"loss": 1.3519, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.3299296796321869, |
|
"learning_rate": 0.0001694472361809045, |
|
"loss": 1.1763, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.33138513565063477, |
|
"learning_rate": 0.0001692462311557789, |
|
"loss": 1.17, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.30424776673316956, |
|
"learning_rate": 0.00016904522613065328, |
|
"loss": 1.123, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.3452983498573303, |
|
"learning_rate": 0.00016884422110552766, |
|
"loss": 1.2999, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.33614206314086914, |
|
"learning_rate": 0.000168643216080402, |
|
"loss": 1.262, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.32416558265686035, |
|
"learning_rate": 0.0001684422110552764, |
|
"loss": 1.2514, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.29827457666397095, |
|
"learning_rate": 0.00016824120603015078, |
|
"loss": 1.2461, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 0.32572871446609497, |
|
"learning_rate": 0.00016804020100502512, |
|
"loss": 1.2393, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.32171282172203064, |
|
"learning_rate": 0.0001678391959798995, |
|
"loss": 1.3045, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.34592801332473755, |
|
"learning_rate": 0.0001676381909547739, |
|
"loss": 1.2669, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.33795440196990967, |
|
"learning_rate": 0.00016743718592964827, |
|
"loss": 1.1404, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.32598641514778137, |
|
"learning_rate": 0.0001672361809045226, |
|
"loss": 1.2495, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.31816181540489197, |
|
"learning_rate": 0.00016703517587939699, |
|
"loss": 1.3003, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.3340943157672882, |
|
"learning_rate": 0.00016683417085427136, |
|
"loss": 1.2615, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.3242477476596832, |
|
"learning_rate": 0.00016663316582914573, |
|
"loss": 1.2527, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.308652400970459, |
|
"learning_rate": 0.0001664321608040201, |
|
"loss": 1.3241, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.31818273663520813, |
|
"learning_rate": 0.00016623115577889448, |
|
"loss": 1.3712, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.32885751128196716, |
|
"learning_rate": 0.00016603015075376885, |
|
"loss": 1.2583, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.32561740279197693, |
|
"learning_rate": 0.00016582914572864322, |
|
"loss": 1.2458, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.3278496563434601, |
|
"learning_rate": 0.0001656281407035176, |
|
"loss": 1.2205, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.32530438899993896, |
|
"learning_rate": 0.00016542713567839197, |
|
"loss": 1.2235, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.31232836842536926, |
|
"learning_rate": 0.00016522613065326634, |
|
"loss": 1.199, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.3209743797779083, |
|
"learning_rate": 0.00016502512562814072, |
|
"loss": 1.2717, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.329940527677536, |
|
"learning_rate": 0.0001648241206030151, |
|
"loss": 1.2425, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.3144824802875519, |
|
"learning_rate": 0.00016462311557788946, |
|
"loss": 1.2444, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.3218553066253662, |
|
"learning_rate": 0.0001644221105527638, |
|
"loss": 1.2815, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.33460506796836853, |
|
"learning_rate": 0.0001642211055276382, |
|
"loss": 1.3774, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.3300727605819702, |
|
"learning_rate": 0.00016402010050251258, |
|
"loss": 1.3436, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 0.3530360460281372, |
|
"learning_rate": 0.00016381909547738695, |
|
"loss": 1.2605, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.3326485753059387, |
|
"learning_rate": 0.0001636180904522613, |
|
"loss": 1.2202, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.31355732679367065, |
|
"learning_rate": 0.0001634170854271357, |
|
"loss": 1.2798, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.3162304759025574, |
|
"learning_rate": 0.00016321608040201007, |
|
"loss": 1.2118, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.32264095544815063, |
|
"learning_rate": 0.00016301507537688442, |
|
"loss": 1.2775, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.30425918102264404, |
|
"learning_rate": 0.0001628140703517588, |
|
"loss": 1.1438, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.33907556533813477, |
|
"learning_rate": 0.00016261306532663316, |
|
"loss": 1.4077, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.32334232330322266, |
|
"learning_rate": 0.00016241206030150756, |
|
"loss": 1.2673, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.32999834418296814, |
|
"learning_rate": 0.0001622110552763819, |
|
"loss": 1.257, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.3223746120929718, |
|
"learning_rate": 0.00016201005025125628, |
|
"loss": 1.2125, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.3236989378929138, |
|
"learning_rate": 0.00016180904522613066, |
|
"loss": 1.3645, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.3303336203098297, |
|
"learning_rate": 0.00016160804020100503, |
|
"loss": 1.2786, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.3135005831718445, |
|
"learning_rate": 0.0001614070351758794, |
|
"loss": 1.2775, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.3185466527938843, |
|
"learning_rate": 0.00016120603015075378, |
|
"loss": 1.2128, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.3355714976787567, |
|
"learning_rate": 0.00016100502512562815, |
|
"loss": 1.307, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.339216023683548, |
|
"learning_rate": 0.00016080402010050252, |
|
"loss": 1.2845, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.335781455039978, |
|
"learning_rate": 0.0001606030150753769, |
|
"loss": 1.3783, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.3324490189552307, |
|
"learning_rate": 0.00016040201005025127, |
|
"loss": 1.3027, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.3381625711917877, |
|
"learning_rate": 0.00016020100502512564, |
|
"loss": 1.1912, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.3119088113307953, |
|
"learning_rate": 0.00016, |
|
"loss": 1.2789, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.32320040464401245, |
|
"learning_rate": 0.00015979899497487439, |
|
"loss": 1.2454, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.32925647497177124, |
|
"learning_rate": 0.00015959798994974876, |
|
"loss": 1.2602, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 0.32569676637649536, |
|
"learning_rate": 0.0001593969849246231, |
|
"loss": 1.2034, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.3359280824661255, |
|
"learning_rate": 0.0001591959798994975, |
|
"loss": 1.2624, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.3207138180732727, |
|
"learning_rate": 0.00015899497487437188, |
|
"loss": 1.2814, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.32691851258277893, |
|
"learning_rate": 0.00015879396984924625, |
|
"loss": 1.2213, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.33548569679260254, |
|
"learning_rate": 0.0001585929648241206, |
|
"loss": 1.133, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.3024287819862366, |
|
"learning_rate": 0.000158391959798995, |
|
"loss": 1.1925, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.3605235517024994, |
|
"learning_rate": 0.00015819095477386937, |
|
"loss": 1.3439, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.33820798993110657, |
|
"learning_rate": 0.00015798994974874372, |
|
"loss": 1.1816, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.32633164525032043, |
|
"learning_rate": 0.0001577889447236181, |
|
"loss": 1.2521, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.34430956840515137, |
|
"learning_rate": 0.00015758793969849246, |
|
"loss": 1.3119, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.32302767038345337, |
|
"learning_rate": 0.00015738693467336686, |
|
"loss": 1.2437, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.3343082070350647, |
|
"learning_rate": 0.0001571859296482412, |
|
"loss": 1.298, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.34785017371177673, |
|
"learning_rate": 0.00015698492462311558, |
|
"loss": 1.2371, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.3445768356323242, |
|
"learning_rate": 0.00015678391959798995, |
|
"loss": 1.2617, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.3664666414260864, |
|
"learning_rate": 0.00015658291457286433, |
|
"loss": 1.1819, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.3051821291446686, |
|
"learning_rate": 0.0001563819095477387, |
|
"loss": 1.3238, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.31191888451576233, |
|
"learning_rate": 0.00015618090452261307, |
|
"loss": 1.2955, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.33101990818977356, |
|
"learning_rate": 0.00015597989949748745, |
|
"loss": 1.2489, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.3157234489917755, |
|
"learning_rate": 0.00015577889447236182, |
|
"loss": 1.3097, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.29264160990715027, |
|
"learning_rate": 0.0001555778894472362, |
|
"loss": 1.2254, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.34887459874153137, |
|
"learning_rate": 0.00015537688442211056, |
|
"loss": 1.2773, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.3264656960964203, |
|
"learning_rate": 0.00015517587939698494, |
|
"loss": 1.2045, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 0.3183201849460602, |
|
"learning_rate": 0.0001549748743718593, |
|
"loss": 1.3232, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.3423653542995453, |
|
"learning_rate": 0.00015477386934673368, |
|
"loss": 1.2111, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.32361966371536255, |
|
"learning_rate": 0.00015457286432160806, |
|
"loss": 1.2766, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.35348379611968994, |
|
"learning_rate": 0.0001543718592964824, |
|
"loss": 1.2533, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.348850280046463, |
|
"learning_rate": 0.0001541708542713568, |
|
"loss": 1.2907, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.31669101119041443, |
|
"learning_rate": 0.00015396984924623117, |
|
"loss": 1.1785, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.3331408202648163, |
|
"learning_rate": 0.00015376884422110555, |
|
"loss": 1.2429, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.3349299728870392, |
|
"learning_rate": 0.0001535678391959799, |
|
"loss": 1.1955, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.3368314802646637, |
|
"learning_rate": 0.00015336683417085427, |
|
"loss": 1.3476, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.3111830949783325, |
|
"learning_rate": 0.00015316582914572867, |
|
"loss": 1.2382, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.32492902874946594, |
|
"learning_rate": 0.000152964824120603, |
|
"loss": 1.2453, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.3176097869873047, |
|
"learning_rate": 0.00015276381909547739, |
|
"loss": 1.2639, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.3306467831134796, |
|
"learning_rate": 0.00015256281407035176, |
|
"loss": 1.2979, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.33482393622398376, |
|
"learning_rate": 0.00015236180904522613, |
|
"loss": 1.2855, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.317231684923172, |
|
"learning_rate": 0.0001521608040201005, |
|
"loss": 1.336, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.3562380075454712, |
|
"learning_rate": 0.00015195979899497488, |
|
"loss": 1.2538, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.3557191491127014, |
|
"learning_rate": 0.00015175879396984925, |
|
"loss": 1.1941, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.33679717779159546, |
|
"learning_rate": 0.00015155778894472362, |
|
"loss": 1.2929, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.32738903164863586, |
|
"learning_rate": 0.000151356783919598, |
|
"loss": 1.1497, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.3145580589771271, |
|
"learning_rate": 0.00015115577889447237, |
|
"loss": 1.2982, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.3444727659225464, |
|
"learning_rate": 0.00015095477386934674, |
|
"loss": 1.3321, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.3220258355140686, |
|
"learning_rate": 0.00015075376884422112, |
|
"loss": 1.1777, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 0.32968461513519287, |
|
"learning_rate": 0.0001505527638190955, |
|
"loss": 1.2707, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.3543086647987366, |
|
"learning_rate": 0.00015035175879396986, |
|
"loss": 1.2997, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.32566267251968384, |
|
"learning_rate": 0.00015015075376884423, |
|
"loss": 1.2313, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.31076309084892273, |
|
"learning_rate": 0.0001499497487437186, |
|
"loss": 1.3003, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.33521464467048645, |
|
"learning_rate": 0.00014974874371859298, |
|
"loss": 1.2593, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.3666183054447174, |
|
"learning_rate": 0.00014954773869346735, |
|
"loss": 1.3473, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.36615288257598877, |
|
"learning_rate": 0.0001493467336683417, |
|
"loss": 1.2521, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.3385326564311981, |
|
"learning_rate": 0.0001491457286432161, |
|
"loss": 1.3117, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.32243263721466064, |
|
"learning_rate": 0.00014894472361809047, |
|
"loss": 1.1798, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.3227294087409973, |
|
"learning_rate": 0.00014874371859296482, |
|
"loss": 1.2362, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.3345654606819153, |
|
"learning_rate": 0.0001485427135678392, |
|
"loss": 1.2136, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.31397944688796997, |
|
"learning_rate": 0.00014834170854271356, |
|
"loss": 1.1987, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.3394251763820648, |
|
"learning_rate": 0.00014814070351758796, |
|
"loss": 1.2022, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.33496084809303284, |
|
"learning_rate": 0.0001479396984924623, |
|
"loss": 1.302, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.35757845640182495, |
|
"learning_rate": 0.00014773869346733668, |
|
"loss": 1.2574, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.332405686378479, |
|
"learning_rate": 0.00014753768844221106, |
|
"loss": 1.2773, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.32756730914115906, |
|
"learning_rate": 0.00014733668341708543, |
|
"loss": 1.2189, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.3382038176059723, |
|
"learning_rate": 0.0001471356783919598, |
|
"loss": 1.2143, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.35607925057411194, |
|
"learning_rate": 0.00014693467336683417, |
|
"loss": 1.3101, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.3490254282951355, |
|
"learning_rate": 0.00014673366834170855, |
|
"loss": 1.2813, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.34010350704193115, |
|
"learning_rate": 0.00014653266331658292, |
|
"loss": 1.1883, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.33997610211372375, |
|
"learning_rate": 0.0001463316582914573, |
|
"loss": 1.2936, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 0.3269011378288269, |
|
"learning_rate": 0.00014613065326633167, |
|
"loss": 1.2682, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.34441789984703064, |
|
"learning_rate": 0.00014592964824120604, |
|
"loss": 1.2556, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.3339982330799103, |
|
"learning_rate": 0.0001457286432160804, |
|
"loss": 1.3609, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.3199785649776459, |
|
"learning_rate": 0.00014552763819095479, |
|
"loss": 1.276, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.33970314264297485, |
|
"learning_rate": 0.00014532663316582916, |
|
"loss": 1.2971, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.3045497536659241, |
|
"learning_rate": 0.00014512562814070353, |
|
"loss": 1.2067, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.334547221660614, |
|
"learning_rate": 0.0001449246231155779, |
|
"loss": 1.2418, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.32451215386390686, |
|
"learning_rate": 0.00014472361809045228, |
|
"loss": 1.3319, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.33704888820648193, |
|
"learning_rate": 0.00014452261306532665, |
|
"loss": 1.3065, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.3138297200202942, |
|
"learning_rate": 0.000144321608040201, |
|
"loss": 1.1864, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.31389278173446655, |
|
"learning_rate": 0.00014412060301507537, |
|
"loss": 1.2409, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.33956632018089294, |
|
"learning_rate": 0.00014391959798994977, |
|
"loss": 1.2591, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.3188384473323822, |
|
"learning_rate": 0.00014371859296482411, |
|
"loss": 1.232, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.36883220076560974, |
|
"learning_rate": 0.0001435175879396985, |
|
"loss": 1.2569, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.3178212344646454, |
|
"learning_rate": 0.00014331658291457286, |
|
"loss": 1.2337, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.33480581641197205, |
|
"learning_rate": 0.00014311557788944726, |
|
"loss": 1.2784, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.3228307366371155, |
|
"learning_rate": 0.0001429145728643216, |
|
"loss": 1.2186, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.3270285427570343, |
|
"learning_rate": 0.00014271356783919598, |
|
"loss": 1.252, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.32656142115592957, |
|
"learning_rate": 0.00014251256281407035, |
|
"loss": 1.2598, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.3287805914878845, |
|
"learning_rate": 0.00014231155778894473, |
|
"loss": 1.2528, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.351793110370636, |
|
"learning_rate": 0.0001421105527638191, |
|
"loss": 1.1774, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.334957093000412, |
|
"learning_rate": 0.00014190954773869347, |
|
"loss": 1.2204, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 0.3303321897983551, |
|
"learning_rate": 0.00014170854271356784, |
|
"loss": 1.2792, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.325514018535614, |
|
"learning_rate": 0.00014150753768844222, |
|
"loss": 1.2095, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.32064923644065857, |
|
"learning_rate": 0.0001413065326633166, |
|
"loss": 1.2369, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.3173045516014099, |
|
"learning_rate": 0.00014110552763819096, |
|
"loss": 1.2092, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.3447834551334381, |
|
"learning_rate": 0.00014090452261306534, |
|
"loss": 1.3058, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.355277419090271, |
|
"learning_rate": 0.0001407035175879397, |
|
"loss": 1.2147, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.321415513753891, |
|
"learning_rate": 0.00014050251256281408, |
|
"loss": 1.3128, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.316572904586792, |
|
"learning_rate": 0.00014030150753768846, |
|
"loss": 1.2563, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.35804814100265503, |
|
"learning_rate": 0.0001401005025125628, |
|
"loss": 1.3382, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.32747843861579895, |
|
"learning_rate": 0.0001398994974874372, |
|
"loss": 1.198, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.35342931747436523, |
|
"learning_rate": 0.00013969849246231157, |
|
"loss": 1.2672, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.32692718505859375, |
|
"learning_rate": 0.00013949748743718595, |
|
"loss": 1.2641, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.31664589047431946, |
|
"learning_rate": 0.0001392964824120603, |
|
"loss": 1.2382, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.33936336636543274, |
|
"learning_rate": 0.00013909547738693467, |
|
"loss": 1.2823, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.342006117105484, |
|
"learning_rate": 0.00013889447236180907, |
|
"loss": 1.2712, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.31698647141456604, |
|
"learning_rate": 0.0001386934673366834, |
|
"loss": 1.2551, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.32440513372421265, |
|
"learning_rate": 0.00013849246231155778, |
|
"loss": 1.2512, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.3394576907157898, |
|
"learning_rate": 0.00013829145728643216, |
|
"loss": 1.3612, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.3272732198238373, |
|
"learning_rate": 0.00013809045226130656, |
|
"loss": 1.1537, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.33435794711112976, |
|
"learning_rate": 0.0001378894472361809, |
|
"loss": 1.2832, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.3546105921268463, |
|
"learning_rate": 0.00013768844221105528, |
|
"loss": 1.2398, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.3501565754413605, |
|
"learning_rate": 0.00013748743718592965, |
|
"loss": 1.2111, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 0.35097482800483704, |
|
"learning_rate": 0.00013728643216080402, |
|
"loss": 1.3033, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.3313996195793152, |
|
"learning_rate": 0.0001370854271356784, |
|
"loss": 1.2819, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.32861942052841187, |
|
"learning_rate": 0.00013688442211055277, |
|
"loss": 1.2566, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.33757033944129944, |
|
"learning_rate": 0.00013668341708542714, |
|
"loss": 1.2951, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.33206436038017273, |
|
"learning_rate": 0.00013648241206030151, |
|
"loss": 1.1828, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.34780171513557434, |
|
"learning_rate": 0.0001362814070351759, |
|
"loss": 1.2766, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.3237352669239044, |
|
"learning_rate": 0.00013608040201005026, |
|
"loss": 1.2885, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.32298171520233154, |
|
"learning_rate": 0.00013587939698492463, |
|
"loss": 1.1687, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.3167840540409088, |
|
"learning_rate": 0.000135678391959799, |
|
"loss": 1.2617, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.3790431618690491, |
|
"learning_rate": 0.00013547738693467338, |
|
"loss": 1.238, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.34648290276527405, |
|
"learning_rate": 0.00013527638190954775, |
|
"loss": 1.2802, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.32841557264328003, |
|
"learning_rate": 0.0001350753768844221, |
|
"loss": 1.2493, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.3556242287158966, |
|
"learning_rate": 0.00013487437185929647, |
|
"loss": 1.2269, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.38643625378608704, |
|
"learning_rate": 0.00013467336683417087, |
|
"loss": 1.3036, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.33449631929397583, |
|
"learning_rate": 0.00013447236180904524, |
|
"loss": 1.2595, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.33076462149620056, |
|
"learning_rate": 0.0001342713567839196, |
|
"loss": 1.2376, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.3205597698688507, |
|
"learning_rate": 0.00013407035175879396, |
|
"loss": 1.2323, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.31788671016693115, |
|
"learning_rate": 0.00013386934673366836, |
|
"loss": 1.2318, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.3543769419193268, |
|
"learning_rate": 0.0001336683417085427, |
|
"loss": 1.2916, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.31310132145881653, |
|
"learning_rate": 0.00013346733668341708, |
|
"loss": 1.1925, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.3153649866580963, |
|
"learning_rate": 0.00013326633165829146, |
|
"loss": 1.282, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.33127865195274353, |
|
"learning_rate": 0.00013306532663316586, |
|
"loss": 1.2753, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 0.36392638087272644, |
|
"learning_rate": 0.0001328643216080402, |
|
"loss": 1.2895, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.33100610971450806, |
|
"learning_rate": 0.00013266331658291457, |
|
"loss": 1.2334, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.3381032347679138, |
|
"learning_rate": 0.00013246231155778895, |
|
"loss": 1.2819, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.31431353092193604, |
|
"learning_rate": 0.00013226130653266332, |
|
"loss": 1.2468, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.31695157289505005, |
|
"learning_rate": 0.0001320603015075377, |
|
"loss": 1.2349, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.3341975808143616, |
|
"learning_rate": 0.00013185929648241207, |
|
"loss": 1.266, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.32825469970703125, |
|
"learning_rate": 0.00013165829145728644, |
|
"loss": 1.28, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.3523416817188263, |
|
"learning_rate": 0.0001314572864321608, |
|
"loss": 1.1085, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.3559586703777313, |
|
"learning_rate": 0.00013125628140703518, |
|
"loss": 1.3506, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.33866238594055176, |
|
"learning_rate": 0.00013105527638190956, |
|
"loss": 1.1631, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.3517313301563263, |
|
"learning_rate": 0.00013085427135678393, |
|
"loss": 1.2348, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.3219257891178131, |
|
"learning_rate": 0.0001306532663316583, |
|
"loss": 1.1565, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.3182775676250458, |
|
"learning_rate": 0.00013045226130653268, |
|
"loss": 1.3332, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.33609387278556824, |
|
"learning_rate": 0.00013025125628140705, |
|
"loss": 1.2197, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.3392961621284485, |
|
"learning_rate": 0.0001300502512562814, |
|
"loss": 1.3303, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.3233344256877899, |
|
"learning_rate": 0.00012984924623115577, |
|
"loss": 1.1869, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.3254396617412567, |
|
"learning_rate": 0.00012964824120603017, |
|
"loss": 1.2693, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.3262885510921478, |
|
"learning_rate": 0.00012944723618090454, |
|
"loss": 1.2837, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.3186768889427185, |
|
"learning_rate": 0.0001292462311557789, |
|
"loss": 1.2705, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.3470524251461029, |
|
"learning_rate": 0.00012904522613065326, |
|
"loss": 1.2626, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.31644207239151, |
|
"learning_rate": 0.00012884422110552766, |
|
"loss": 1.2217, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.3402186930179596, |
|
"learning_rate": 0.000128643216080402, |
|
"loss": 1.2456, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.32568660378456116, |
|
"learning_rate": 0.00012844221105527638, |
|
"loss": 1.3024, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.31550562381744385, |
|
"learning_rate": 0.00012824120603015075, |
|
"loss": 1.2512, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.3515610098838806, |
|
"learning_rate": 0.00012804020100502515, |
|
"loss": 1.2323, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.3370158076286316, |
|
"learning_rate": 0.0001278391959798995, |
|
"loss": 1.3072, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.3440285623073578, |
|
"learning_rate": 0.00012763819095477387, |
|
"loss": 1.2268, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.37662672996520996, |
|
"learning_rate": 0.00012743718592964824, |
|
"loss": 1.2495, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.32195794582366943, |
|
"learning_rate": 0.00012723618090452262, |
|
"loss": 1.2977, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.3311251103878021, |
|
"learning_rate": 0.000127035175879397, |
|
"loss": 1.3372, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.3319653272628784, |
|
"learning_rate": 0.00012683417085427136, |
|
"loss": 1.2813, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.3444850742816925, |
|
"learning_rate": 0.00012663316582914574, |
|
"loss": 1.2877, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.3433425724506378, |
|
"learning_rate": 0.0001264321608040201, |
|
"loss": 1.2696, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.35098111629486084, |
|
"learning_rate": 0.00012623115577889448, |
|
"loss": 1.28, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.3203146457672119, |
|
"learning_rate": 0.00012603015075376885, |
|
"loss": 1.2533, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.30470582842826843, |
|
"learning_rate": 0.00012582914572864323, |
|
"loss": 1.2678, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.324220210313797, |
|
"learning_rate": 0.0001256281407035176, |
|
"loss": 1.1909, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.3313479423522949, |
|
"learning_rate": 0.00012542713567839197, |
|
"loss": 1.2528, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.31879833340644836, |
|
"learning_rate": 0.00012522613065326635, |
|
"loss": 1.1431, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.3483116626739502, |
|
"learning_rate": 0.0001250251256281407, |
|
"loss": 1.1517, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.3220193684101105, |
|
"learning_rate": 0.00012482412060301507, |
|
"loss": 1.2532, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.3391655683517456, |
|
"learning_rate": 0.00012462311557788947, |
|
"loss": 1.2565, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.3446550667285919, |
|
"learning_rate": 0.00012442211055276384, |
|
"loss": 1.2253, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 0.3528743386268616, |
|
"learning_rate": 0.00012422110552763818, |
|
"loss": 1.2919, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.32574883103370667, |
|
"learning_rate": 0.00012402010050251256, |
|
"loss": 1.2885, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.3145955502986908, |
|
"learning_rate": 0.00012381909547738696, |
|
"loss": 1.2307, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.3239680230617523, |
|
"learning_rate": 0.0001236180904522613, |
|
"loss": 1.2612, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.3375207185745239, |
|
"learning_rate": 0.00012341708542713568, |
|
"loss": 1.274, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.3346465528011322, |
|
"learning_rate": 0.00012321608040201005, |
|
"loss": 1.2074, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.3280505836009979, |
|
"learning_rate": 0.00012301507537688445, |
|
"loss": 1.2776, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.3411586880683899, |
|
"learning_rate": 0.0001228140703517588, |
|
"loss": 1.3012, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.32394883036613464, |
|
"learning_rate": 0.00012261306532663317, |
|
"loss": 1.3018, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.34626421332359314, |
|
"learning_rate": 0.00012241206030150754, |
|
"loss": 1.2882, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.3305688500404358, |
|
"learning_rate": 0.00012221105527638191, |
|
"loss": 1.2484, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.33277568221092224, |
|
"learning_rate": 0.00012201005025125629, |
|
"loss": 1.2088, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.3431893289089203, |
|
"learning_rate": 0.00012180904522613066, |
|
"loss": 1.1882, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.3225569725036621, |
|
"learning_rate": 0.00012160804020100502, |
|
"loss": 1.2162, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.33999207615852356, |
|
"learning_rate": 0.00012140703517587942, |
|
"loss": 1.2532, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.3457259237766266, |
|
"learning_rate": 0.00012120603015075378, |
|
"loss": 1.2045, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.35479119420051575, |
|
"learning_rate": 0.00012100502512562815, |
|
"loss": 1.2475, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.34909576177597046, |
|
"learning_rate": 0.00012080402010050251, |
|
"loss": 1.2581, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.33657559752464294, |
|
"learning_rate": 0.00012060301507537688, |
|
"loss": 1.1876, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.33252567052841187, |
|
"learning_rate": 0.00012040201005025127, |
|
"loss": 1.2108, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.35932156443595886, |
|
"learning_rate": 0.00012020100502512563, |
|
"loss": 1.25, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.3509422242641449, |
|
"learning_rate": 0.00012, |
|
"loss": 1.2748, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 0.3509500324726105, |
|
"learning_rate": 0.00011979899497487436, |
|
"loss": 1.2704, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.32239145040512085, |
|
"learning_rate": 0.00011959798994974876, |
|
"loss": 1.2476, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.33603423833847046, |
|
"learning_rate": 0.00011939698492462312, |
|
"loss": 1.278, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.3381786346435547, |
|
"learning_rate": 0.0001191959798994975, |
|
"loss": 1.2382, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.31310775876045227, |
|
"learning_rate": 0.00011899497487437185, |
|
"loss": 1.2827, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.3387271463871002, |
|
"learning_rate": 0.00011879396984924624, |
|
"loss": 1.2891, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.3353903293609619, |
|
"learning_rate": 0.00011859296482412061, |
|
"loss": 1.326, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.322992742061615, |
|
"learning_rate": 0.00011839195979899497, |
|
"loss": 1.2513, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.3425077199935913, |
|
"learning_rate": 0.00011819095477386935, |
|
"loss": 1.1482, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.3305937647819519, |
|
"learning_rate": 0.00011798994974874373, |
|
"loss": 1.1919, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.3408913016319275, |
|
"learning_rate": 0.0001177889447236181, |
|
"loss": 1.2535, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.43716689944267273, |
|
"learning_rate": 0.00011758793969849247, |
|
"loss": 1.33, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.34090203046798706, |
|
"learning_rate": 0.00011738693467336684, |
|
"loss": 1.1666, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.35914671421051025, |
|
"learning_rate": 0.00011718592964824122, |
|
"loss": 1.2683, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.3459693193435669, |
|
"learning_rate": 0.00011698492462311558, |
|
"loss": 1.2502, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.3254222273826599, |
|
"learning_rate": 0.00011678391959798996, |
|
"loss": 1.2369, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.33233174681663513, |
|
"learning_rate": 0.00011658291457286432, |
|
"loss": 1.1769, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.3394586145877838, |
|
"learning_rate": 0.00011638190954773872, |
|
"loss": 1.1441, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.3482055962085724, |
|
"learning_rate": 0.00011618090452261308, |
|
"loss": 1.1411, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.33943256735801697, |
|
"learning_rate": 0.00011597989949748745, |
|
"loss": 1.274, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.34545761346817017, |
|
"learning_rate": 0.00011577889447236181, |
|
"loss": 1.1839, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.3279217481613159, |
|
"learning_rate": 0.00011557788944723618, |
|
"loss": 1.2555, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 0.3297037184238434, |
|
"learning_rate": 0.00011537688442211057, |
|
"loss": 1.2645, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.3765062391757965, |
|
"learning_rate": 0.00011517587939698493, |
|
"loss": 1.2166, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.34099629521369934, |
|
"learning_rate": 0.0001149748743718593, |
|
"loss": 1.1985, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.3941348195075989, |
|
"learning_rate": 0.00011477386934673366, |
|
"loss": 1.2124, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.3421550989151001, |
|
"learning_rate": 0.00011457286432160806, |
|
"loss": 1.2202, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.3587627708911896, |
|
"learning_rate": 0.00011437185929648242, |
|
"loss": 1.2119, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.318024605512619, |
|
"learning_rate": 0.00011417085427135679, |
|
"loss": 1.1717, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.3441738486289978, |
|
"learning_rate": 0.00011396984924623115, |
|
"loss": 1.2637, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.32831835746765137, |
|
"learning_rate": 0.00011376884422110554, |
|
"loss": 1.1351, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.40580299496650696, |
|
"learning_rate": 0.00011356783919597991, |
|
"loss": 1.2777, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.3455897867679596, |
|
"learning_rate": 0.00011336683417085427, |
|
"loss": 1.2711, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.34554949402809143, |
|
"learning_rate": 0.00011316582914572864, |
|
"loss": 1.2605, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.333046019077301, |
|
"learning_rate": 0.00011296482412060303, |
|
"loss": 1.1934, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.37090611457824707, |
|
"learning_rate": 0.0001127638190954774, |
|
"loss": 1.2289, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.3464914560317993, |
|
"learning_rate": 0.00011256281407035176, |
|
"loss": 1.2573, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.3522966504096985, |
|
"learning_rate": 0.00011236180904522614, |
|
"loss": 1.233, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.34124764800071716, |
|
"learning_rate": 0.00011216080402010052, |
|
"loss": 1.2167, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.32103344798088074, |
|
"learning_rate": 0.00011195979899497488, |
|
"loss": 1.2823, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.35400694608688354, |
|
"learning_rate": 0.00011175879396984925, |
|
"loss": 1.275, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.32526448369026184, |
|
"learning_rate": 0.00011155778894472361, |
|
"loss": 1.2298, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.32495397329330444, |
|
"learning_rate": 0.00011135678391959799, |
|
"loss": 1.2328, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.33901894092559814, |
|
"learning_rate": 0.00011115577889447237, |
|
"loss": 1.2864, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 0.3219538927078247, |
|
"learning_rate": 0.00011095477386934675, |
|
"loss": 1.1061, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.3175276219844818, |
|
"learning_rate": 0.0001107537688442211, |
|
"loss": 1.2714, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.32311904430389404, |
|
"learning_rate": 0.00011055276381909548, |
|
"loss": 1.2635, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.3252653181552887, |
|
"learning_rate": 0.00011035175879396986, |
|
"loss": 1.2694, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.337410032749176, |
|
"learning_rate": 0.00011015075376884422, |
|
"loss": 1.2197, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.34929850697517395, |
|
"learning_rate": 0.0001099497487437186, |
|
"loss": 1.2773, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.3558543622493744, |
|
"learning_rate": 0.00010974874371859296, |
|
"loss": 1.2673, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.309593141078949, |
|
"learning_rate": 0.00010954773869346736, |
|
"loss": 1.2945, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.30904704332351685, |
|
"learning_rate": 0.00010934673366834172, |
|
"loss": 1.2362, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.3560062646865845, |
|
"learning_rate": 0.00010914572864321609, |
|
"loss": 1.2874, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.3410942554473877, |
|
"learning_rate": 0.00010894472361809045, |
|
"loss": 1.2621, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.33610406517982483, |
|
"learning_rate": 0.00010874371859296483, |
|
"loss": 1.2572, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.3685830235481262, |
|
"learning_rate": 0.00010854271356783921, |
|
"loss": 1.292, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.3263039290904999, |
|
"learning_rate": 0.00010834170854271357, |
|
"loss": 1.1467, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.33784759044647217, |
|
"learning_rate": 0.00010814070351758794, |
|
"loss": 1.2536, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.3310985565185547, |
|
"learning_rate": 0.00010793969849246233, |
|
"loss": 1.2865, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.3608328104019165, |
|
"learning_rate": 0.0001077386934673367, |
|
"loss": 1.3144, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.3107350468635559, |
|
"learning_rate": 0.00010753768844221106, |
|
"loss": 1.1831, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.3376270532608032, |
|
"learning_rate": 0.00010733668341708543, |
|
"loss": 1.2699, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.34757518768310547, |
|
"learning_rate": 0.00010713567839195982, |
|
"loss": 1.3423, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.3217342495918274, |
|
"learning_rate": 0.00010693467336683418, |
|
"loss": 1.2629, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.3594968020915985, |
|
"learning_rate": 0.00010673366834170855, |
|
"loss": 1.2613, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 0.34216034412384033, |
|
"learning_rate": 0.00010653266331658291, |
|
"loss": 1.2807, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.33661434054374695, |
|
"learning_rate": 0.00010633165829145728, |
|
"loss": 1.2175, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.3459634780883789, |
|
"learning_rate": 0.00010613065326633167, |
|
"loss": 1.2234, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.31939029693603516, |
|
"learning_rate": 0.00010592964824120604, |
|
"loss": 1.2566, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.3308617174625397, |
|
"learning_rate": 0.0001057286432160804, |
|
"loss": 1.2249, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.3457432985305786, |
|
"learning_rate": 0.00010552763819095478, |
|
"loss": 1.3377, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.3623298704624176, |
|
"learning_rate": 0.00010532663316582916, |
|
"loss": 1.295, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.3287794888019562, |
|
"learning_rate": 0.00010512562814070352, |
|
"loss": 1.3167, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.32969963550567627, |
|
"learning_rate": 0.0001049246231155779, |
|
"loss": 1.1917, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.3520050346851349, |
|
"learning_rate": 0.00010472361809045225, |
|
"loss": 1.2786, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.33835569024086, |
|
"learning_rate": 0.00010452261306532664, |
|
"loss": 1.2734, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.32975468039512634, |
|
"learning_rate": 0.00010432160804020101, |
|
"loss": 1.2263, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.3600429594516754, |
|
"learning_rate": 0.00010412060301507539, |
|
"loss": 1.308, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.35504522919654846, |
|
"learning_rate": 0.00010391959798994975, |
|
"loss": 1.3255, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.334204763174057, |
|
"learning_rate": 0.00010371859296482413, |
|
"loss": 1.2029, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.32885733246803284, |
|
"learning_rate": 0.0001035175879396985, |
|
"loss": 1.258, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.3293534815311432, |
|
"learning_rate": 0.00010331658291457286, |
|
"loss": 1.2438, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.3399008810520172, |
|
"learning_rate": 0.00010311557788944724, |
|
"loss": 1.2508, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.3626408576965332, |
|
"learning_rate": 0.00010291457286432162, |
|
"loss": 1.2555, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.3435054123401642, |
|
"learning_rate": 0.00010271356783919598, |
|
"loss": 1.1557, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.35252466797828674, |
|
"learning_rate": 0.00010251256281407036, |
|
"loss": 1.2756, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.3346278667449951, |
|
"learning_rate": 0.00010231155778894473, |
|
"loss": 1.2722, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 0.33955395221710205, |
|
"learning_rate": 0.00010211055276381909, |
|
"loss": 1.2445, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.33230292797088623, |
|
"learning_rate": 0.00010190954773869348, |
|
"loss": 1.2836, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.32656341791152954, |
|
"learning_rate": 0.00010170854271356785, |
|
"loss": 1.2414, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.3479287922382355, |
|
"learning_rate": 0.00010150753768844221, |
|
"loss": 1.2442, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.3435857892036438, |
|
"learning_rate": 0.00010130653266331658, |
|
"loss": 1.2344, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.3324833810329437, |
|
"learning_rate": 0.00010110552763819097, |
|
"loss": 1.2261, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.3612833023071289, |
|
"learning_rate": 0.00010090452261306533, |
|
"loss": 1.3509, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.3381580710411072, |
|
"learning_rate": 0.0001007035175879397, |
|
"loss": 1.2508, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.3283715546131134, |
|
"learning_rate": 0.00010050251256281407, |
|
"loss": 1.223, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.3245822787284851, |
|
"learning_rate": 0.00010030150753768846, |
|
"loss": 1.214, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.33582326769828796, |
|
"learning_rate": 0.00010010050251256282, |
|
"loss": 1.1891, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.34178048372268677, |
|
"learning_rate": 9.989949748743719e-05, |
|
"loss": 1.2328, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.3429703414440155, |
|
"learning_rate": 9.969849246231156e-05, |
|
"loss": 1.243, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.3459545969963074, |
|
"learning_rate": 9.949748743718594e-05, |
|
"loss": 1.2272, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.32898616790771484, |
|
"learning_rate": 9.929648241206031e-05, |
|
"loss": 1.2651, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.33411911129951477, |
|
"learning_rate": 9.909547738693468e-05, |
|
"loss": 1.2215, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.3196270763874054, |
|
"learning_rate": 9.889447236180906e-05, |
|
"loss": 1.2338, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.3360273241996765, |
|
"learning_rate": 9.869346733668342e-05, |
|
"loss": 1.1259, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.3572694957256317, |
|
"learning_rate": 9.84924623115578e-05, |
|
"loss": 1.2741, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.3491540849208832, |
|
"learning_rate": 9.829145728643216e-05, |
|
"loss": 1.2844, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.3704037666320801, |
|
"learning_rate": 9.809045226130655e-05, |
|
"loss": 1.2974, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.3397068381309509, |
|
"learning_rate": 9.788944723618091e-05, |
|
"loss": 1.2155, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 0.3439743220806122, |
|
"learning_rate": 9.768844221105528e-05, |
|
"loss": 1.2449, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.3374169170856476, |
|
"learning_rate": 9.748743718592965e-05, |
|
"loss": 1.1984, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.3484777510166168, |
|
"learning_rate": 9.728643216080403e-05, |
|
"loss": 1.2707, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.34569051861763, |
|
"learning_rate": 9.70854271356784e-05, |
|
"loss": 1.2669, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.3295353949069977, |
|
"learning_rate": 9.688442211055276e-05, |
|
"loss": 1.3073, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.3298560380935669, |
|
"learning_rate": 9.668341708542715e-05, |
|
"loss": 1.1989, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.342427521944046, |
|
"learning_rate": 9.64824120603015e-05, |
|
"loss": 1.2467, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.34153419733047485, |
|
"learning_rate": 9.628140703517589e-05, |
|
"loss": 1.1852, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.33842480182647705, |
|
"learning_rate": 9.608040201005025e-05, |
|
"loss": 1.277, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.3495989739894867, |
|
"learning_rate": 9.587939698492462e-05, |
|
"loss": 1.2363, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.34229379892349243, |
|
"learning_rate": 9.5678391959799e-05, |
|
"loss": 1.2327, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.3576783835887909, |
|
"learning_rate": 9.547738693467337e-05, |
|
"loss": 1.2011, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.3553127646446228, |
|
"learning_rate": 9.527638190954774e-05, |
|
"loss": 1.2718, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.34771883487701416, |
|
"learning_rate": 9.507537688442212e-05, |
|
"loss": 1.2399, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.34981489181518555, |
|
"learning_rate": 9.487437185929649e-05, |
|
"loss": 1.2305, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.35138848423957825, |
|
"learning_rate": 9.467336683417086e-05, |
|
"loss": 1.3227, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.32845598459243774, |
|
"learning_rate": 9.447236180904523e-05, |
|
"loss": 1.3256, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.35754063725471497, |
|
"learning_rate": 9.427135678391961e-05, |
|
"loss": 1.28, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.3293386697769165, |
|
"learning_rate": 9.407035175879397e-05, |
|
"loss": 1.2435, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.32942119240760803, |
|
"learning_rate": 9.386934673366835e-05, |
|
"loss": 1.2703, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.36065587401390076, |
|
"learning_rate": 9.366834170854271e-05, |
|
"loss": 1.1724, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 0.32289040088653564, |
|
"learning_rate": 9.34673366834171e-05, |
|
"loss": 1.2722, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.3467089533805847, |
|
"learning_rate": 9.326633165829146e-05, |
|
"loss": 1.2233, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.3310029208660126, |
|
"learning_rate": 9.306532663316585e-05, |
|
"loss": 1.2507, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.3369174301624298, |
|
"learning_rate": 9.28643216080402e-05, |
|
"loss": 1.2127, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.34335601329803467, |
|
"learning_rate": 9.266331658291458e-05, |
|
"loss": 1.2038, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.3327116072177887, |
|
"learning_rate": 9.246231155778895e-05, |
|
"loss": 1.3228, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.3617047071456909, |
|
"learning_rate": 9.226130653266331e-05, |
|
"loss": 1.3661, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.3532153069972992, |
|
"learning_rate": 9.20603015075377e-05, |
|
"loss": 1.2159, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.3590898811817169, |
|
"learning_rate": 9.185929648241206e-05, |
|
"loss": 1.2507, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.3496554493904114, |
|
"learning_rate": 9.165829145728644e-05, |
|
"loss": 1.2163, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.34077826142311096, |
|
"learning_rate": 9.14572864321608e-05, |
|
"loss": 1.2127, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.33904626965522766, |
|
"learning_rate": 9.125628140703519e-05, |
|
"loss": 1.1938, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.34727028012275696, |
|
"learning_rate": 9.105527638190955e-05, |
|
"loss": 1.2685, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.3311704397201538, |
|
"learning_rate": 9.085427135678392e-05, |
|
"loss": 1.2014, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.3330560028553009, |
|
"learning_rate": 9.06532663316583e-05, |
|
"loss": 1.2679, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.3092857003211975, |
|
"learning_rate": 9.045226130653267e-05, |
|
"loss": 1.1688, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.3591139018535614, |
|
"learning_rate": 9.025125628140704e-05, |
|
"loss": 1.1914, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.3442589044570923, |
|
"learning_rate": 9.005025125628141e-05, |
|
"loss": 1.2051, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.35013848543167114, |
|
"learning_rate": 8.984924623115579e-05, |
|
"loss": 1.2408, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.3634118139743805, |
|
"learning_rate": 8.964824120603016e-05, |
|
"loss": 1.2016, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.32844385504722595, |
|
"learning_rate": 8.944723618090453e-05, |
|
"loss": 1.186, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.3430251181125641, |
|
"learning_rate": 8.92462311557789e-05, |
|
"loss": 1.3732, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 0.32605141401290894, |
|
"learning_rate": 8.904522613065326e-05, |
|
"loss": 1.2225, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.3491004407405853, |
|
"learning_rate": 8.884422110552765e-05, |
|
"loss": 1.277, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.34751296043395996, |
|
"learning_rate": 8.864321608040201e-05, |
|
"loss": 1.2069, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.31487154960632324, |
|
"learning_rate": 8.84422110552764e-05, |
|
"loss": 1.1973, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.33653488755226135, |
|
"learning_rate": 8.824120603015076e-05, |
|
"loss": 1.3103, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.3140471279621124, |
|
"learning_rate": 8.804020100502513e-05, |
|
"loss": 1.1919, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.37627485394477844, |
|
"learning_rate": 8.78391959798995e-05, |
|
"loss": 1.2185, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.34721139073371887, |
|
"learning_rate": 8.763819095477387e-05, |
|
"loss": 1.2937, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.31878143548965454, |
|
"learning_rate": 8.743718592964825e-05, |
|
"loss": 1.1966, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.3476640284061432, |
|
"learning_rate": 8.723618090452261e-05, |
|
"loss": 1.1035, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.32448452711105347, |
|
"learning_rate": 8.7035175879397e-05, |
|
"loss": 1.2245, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.3310175836086273, |
|
"learning_rate": 8.683417085427135e-05, |
|
"loss": 1.2231, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.3311636745929718, |
|
"learning_rate": 8.663316582914574e-05, |
|
"loss": 1.2363, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.3398733139038086, |
|
"learning_rate": 8.64321608040201e-05, |
|
"loss": 1.2684, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.35292455554008484, |
|
"learning_rate": 8.623115577889449e-05, |
|
"loss": 1.2251, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.3688778579235077, |
|
"learning_rate": 8.603015075376884e-05, |
|
"loss": 1.2493, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.3263533413410187, |
|
"learning_rate": 8.582914572864322e-05, |
|
"loss": 1.2479, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.362587034702301, |
|
"learning_rate": 8.562814070351759e-05, |
|
"loss": 1.2296, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.3232894241809845, |
|
"learning_rate": 8.542713567839196e-05, |
|
"loss": 1.1811, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.3491204082965851, |
|
"learning_rate": 8.522613065326634e-05, |
|
"loss": 1.306, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.3507627546787262, |
|
"learning_rate": 8.502512562814071e-05, |
|
"loss": 1.2438, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.34256500005722046, |
|
"learning_rate": 8.482412060301508e-05, |
|
"loss": 1.2765, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 0.333816260099411, |
|
"learning_rate": 8.462311557788946e-05, |
|
"loss": 1.2372, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.3247378170490265, |
|
"learning_rate": 8.442211055276383e-05, |
|
"loss": 1.2541, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.31364428997039795, |
|
"learning_rate": 8.42211055276382e-05, |
|
"loss": 1.2676, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.3368885815143585, |
|
"learning_rate": 8.402010050251256e-05, |
|
"loss": 1.214, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.3350681662559509, |
|
"learning_rate": 8.381909547738695e-05, |
|
"loss": 1.1894, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.3448706269264221, |
|
"learning_rate": 8.36180904522613e-05, |
|
"loss": 1.2945, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.3268083930015564, |
|
"learning_rate": 8.341708542713568e-05, |
|
"loss": 1.1992, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.3421708941459656, |
|
"learning_rate": 8.321608040201005e-05, |
|
"loss": 1.2229, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.3358671963214874, |
|
"learning_rate": 8.301507537688443e-05, |
|
"loss": 1.1979, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.3192279636859894, |
|
"learning_rate": 8.28140703517588e-05, |
|
"loss": 1.182, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.32525762915611267, |
|
"learning_rate": 8.261306532663317e-05, |
|
"loss": 1.2582, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.3301040232181549, |
|
"learning_rate": 8.241206030150754e-05, |
|
"loss": 1.1714, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.33362287282943726, |
|
"learning_rate": 8.22110552763819e-05, |
|
"loss": 1.2263, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.33714091777801514, |
|
"learning_rate": 8.201005025125629e-05, |
|
"loss": 1.2911, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.33390864729881287, |
|
"learning_rate": 8.180904522613065e-05, |
|
"loss": 1.3051, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.3238459527492523, |
|
"learning_rate": 8.160804020100504e-05, |
|
"loss": 1.2372, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.35366594791412354, |
|
"learning_rate": 8.14070351758794e-05, |
|
"loss": 1.1972, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.33592501282691956, |
|
"learning_rate": 8.120603015075378e-05, |
|
"loss": 1.2653, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.3256325423717499, |
|
"learning_rate": 8.100502512562814e-05, |
|
"loss": 1.2153, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.3295387029647827, |
|
"learning_rate": 8.080402010050251e-05, |
|
"loss": 1.2275, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.31947025656700134, |
|
"learning_rate": 8.060301507537689e-05, |
|
"loss": 1.1627, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.3440350890159607, |
|
"learning_rate": 8.040201005025126e-05, |
|
"loss": 1.2488, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 0.31828573346138, |
|
"learning_rate": 8.020100502512563e-05, |
|
"loss": 1.1908, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.31716206669807434, |
|
"learning_rate": 8e-05, |
|
"loss": 1.1661, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.3545120358467102, |
|
"learning_rate": 7.979899497487438e-05, |
|
"loss": 1.1378, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.3279556930065155, |
|
"learning_rate": 7.959798994974875e-05, |
|
"loss": 1.2868, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.318132609128952, |
|
"learning_rate": 7.939698492462313e-05, |
|
"loss": 1.2021, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.3179965913295746, |
|
"learning_rate": 7.91959798994975e-05, |
|
"loss": 1.2386, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.34012511372566223, |
|
"learning_rate": 7.899497487437186e-05, |
|
"loss": 1.2973, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.35141730308532715, |
|
"learning_rate": 7.879396984924623e-05, |
|
"loss": 1.2415, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.341964989900589, |
|
"learning_rate": 7.85929648241206e-05, |
|
"loss": 1.2097, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.34423911571502686, |
|
"learning_rate": 7.839195979899498e-05, |
|
"loss": 1.1406, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.36800771951675415, |
|
"learning_rate": 7.819095477386935e-05, |
|
"loss": 1.2266, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.31842777132987976, |
|
"learning_rate": 7.798994974874372e-05, |
|
"loss": 1.2199, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.3278830349445343, |
|
"learning_rate": 7.77889447236181e-05, |
|
"loss": 1.248, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.3215343654155731, |
|
"learning_rate": 7.758793969849247e-05, |
|
"loss": 1.317, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.3352849781513214, |
|
"learning_rate": 7.738693467336684e-05, |
|
"loss": 1.2171, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.34062597155570984, |
|
"learning_rate": 7.71859296482412e-05, |
|
"loss": 1.2303, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.35442209243774414, |
|
"learning_rate": 7.698492462311559e-05, |
|
"loss": 1.3004, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.3413764238357544, |
|
"learning_rate": 7.678391959798995e-05, |
|
"loss": 1.2718, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.34083688259124756, |
|
"learning_rate": 7.658291457286433e-05, |
|
"loss": 1.2607, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.3345371186733246, |
|
"learning_rate": 7.638190954773869e-05, |
|
"loss": 1.2217, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.34990695118904114, |
|
"learning_rate": 7.618090452261307e-05, |
|
"loss": 1.2614, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.3319753110408783, |
|
"learning_rate": 7.597989949748744e-05, |
|
"loss": 1.2573, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 0.3669947385787964, |
|
"learning_rate": 7.577889447236181e-05, |
|
"loss": 1.2714, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.34474653005599976, |
|
"learning_rate": 7.557788944723618e-05, |
|
"loss": 1.2419, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.35597702860832214, |
|
"learning_rate": 7.537688442211056e-05, |
|
"loss": 1.2893, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.3305228352546692, |
|
"learning_rate": 7.517587939698493e-05, |
|
"loss": 1.2201, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.35510796308517456, |
|
"learning_rate": 7.49748743718593e-05, |
|
"loss": 1.2193, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.33328601717948914, |
|
"learning_rate": 7.477386934673368e-05, |
|
"loss": 1.2986, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.3292202055454254, |
|
"learning_rate": 7.457286432160805e-05, |
|
"loss": 1.2407, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.3294682800769806, |
|
"learning_rate": 7.437185929648241e-05, |
|
"loss": 1.1331, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.33853623270988464, |
|
"learning_rate": 7.417085427135678e-05, |
|
"loss": 1.2286, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.3701392412185669, |
|
"learning_rate": 7.396984924623115e-05, |
|
"loss": 1.244, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.32087066769599915, |
|
"learning_rate": 7.376884422110553e-05, |
|
"loss": 1.1956, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.3330169916152954, |
|
"learning_rate": 7.35678391959799e-05, |
|
"loss": 1.3153, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.3507911264896393, |
|
"learning_rate": 7.336683417085427e-05, |
|
"loss": 1.2131, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.34372231364250183, |
|
"learning_rate": 7.316582914572865e-05, |
|
"loss": 1.1937, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.33789506554603577, |
|
"learning_rate": 7.296482412060302e-05, |
|
"loss": 1.2465, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.33567336201667786, |
|
"learning_rate": 7.276381909547739e-05, |
|
"loss": 1.2036, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.3606951832771301, |
|
"learning_rate": 7.256281407035177e-05, |
|
"loss": 1.3034, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.3508096933364868, |
|
"learning_rate": 7.236180904522614e-05, |
|
"loss": 1.2744, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.3432866930961609, |
|
"learning_rate": 7.21608040201005e-05, |
|
"loss": 1.2345, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.3186059892177582, |
|
"learning_rate": 7.195979899497488e-05, |
|
"loss": 1.2293, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.3387812077999115, |
|
"learning_rate": 7.175879396984924e-05, |
|
"loss": 1.2352, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.35298967361450195, |
|
"learning_rate": 7.155778894472363e-05, |
|
"loss": 1.3116, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 0.34189942479133606, |
|
"learning_rate": 7.135678391959799e-05, |
|
"loss": 1.2437, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.32053112983703613, |
|
"learning_rate": 7.115577889447236e-05, |
|
"loss": 1.1889, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.3480307161808014, |
|
"learning_rate": 7.095477386934674e-05, |
|
"loss": 1.2233, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.3634546399116516, |
|
"learning_rate": 7.075376884422111e-05, |
|
"loss": 1.2502, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.310649037361145, |
|
"learning_rate": 7.055276381909548e-05, |
|
"loss": 1.2297, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.3128393292427063, |
|
"learning_rate": 7.035175879396985e-05, |
|
"loss": 1.2366, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.32398995757102966, |
|
"learning_rate": 7.015075376884423e-05, |
|
"loss": 1.255, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.3405938148498535, |
|
"learning_rate": 6.99497487437186e-05, |
|
"loss": 1.1838, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.3226099908351898, |
|
"learning_rate": 6.974874371859297e-05, |
|
"loss": 1.2348, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.3316473364830017, |
|
"learning_rate": 6.954773869346733e-05, |
|
"loss": 1.197, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.34110310673713684, |
|
"learning_rate": 6.93467336683417e-05, |
|
"loss": 1.273, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.34403571486473083, |
|
"learning_rate": 6.914572864321608e-05, |
|
"loss": 1.3238, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.3206476867198944, |
|
"learning_rate": 6.894472361809045e-05, |
|
"loss": 1.263, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.3400155007839203, |
|
"learning_rate": 6.874371859296482e-05, |
|
"loss": 1.2727, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.33599746227264404, |
|
"learning_rate": 6.85427135678392e-05, |
|
"loss": 1.2196, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.32535916566848755, |
|
"learning_rate": 6.834170854271357e-05, |
|
"loss": 1.2313, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.33513665199279785, |
|
"learning_rate": 6.814070351758794e-05, |
|
"loss": 1.2353, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.34317225217819214, |
|
"learning_rate": 6.793969849246232e-05, |
|
"loss": 1.2691, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.32187891006469727, |
|
"learning_rate": 6.773869346733669e-05, |
|
"loss": 1.1432, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.37006068229675293, |
|
"learning_rate": 6.753768844221105e-05, |
|
"loss": 1.2977, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.3436387777328491, |
|
"learning_rate": 6.733668341708544e-05, |
|
"loss": 1.1242, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.35817015171051025, |
|
"learning_rate": 6.71356783919598e-05, |
|
"loss": 1.2531, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 0.3394136130809784, |
|
"learning_rate": 6.693467336683418e-05, |
|
"loss": 1.2435, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.35513705015182495, |
|
"learning_rate": 6.673366834170854e-05, |
|
"loss": 1.1198, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.32795122265815735, |
|
"learning_rate": 6.653266331658293e-05, |
|
"loss": 1.1548, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.3325214982032776, |
|
"learning_rate": 6.633165829145729e-05, |
|
"loss": 1.1883, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.3546355962753296, |
|
"learning_rate": 6.613065326633166e-05, |
|
"loss": 1.2405, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.3289410471916199, |
|
"learning_rate": 6.592964824120603e-05, |
|
"loss": 1.099, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.3165920674800873, |
|
"learning_rate": 6.57286432160804e-05, |
|
"loss": 1.212, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.3438250422477722, |
|
"learning_rate": 6.552763819095478e-05, |
|
"loss": 1.2021, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.3379438817501068, |
|
"learning_rate": 6.532663316582915e-05, |
|
"loss": 1.1801, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.3609768748283386, |
|
"learning_rate": 6.512562814070352e-05, |
|
"loss": 1.3406, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.3512379229068756, |
|
"learning_rate": 6.492462311557788e-05, |
|
"loss": 1.2982, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.3456796407699585, |
|
"learning_rate": 6.472361809045227e-05, |
|
"loss": 1.1908, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.3362092971801758, |
|
"learning_rate": 6.452261306532663e-05, |
|
"loss": 1.3286, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.3278902769088745, |
|
"learning_rate": 6.4321608040201e-05, |
|
"loss": 1.1441, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.350449800491333, |
|
"learning_rate": 6.412060301507538e-05, |
|
"loss": 1.2131, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.3359929323196411, |
|
"learning_rate": 6.391959798994975e-05, |
|
"loss": 1.285, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.3683655261993408, |
|
"learning_rate": 6.371859296482412e-05, |
|
"loss": 1.2161, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.3192093074321747, |
|
"learning_rate": 6.35175879396985e-05, |
|
"loss": 1.1578, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.3408317565917969, |
|
"learning_rate": 6.331658291457287e-05, |
|
"loss": 1.1936, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.3410519063472748, |
|
"learning_rate": 6.311557788944724e-05, |
|
"loss": 1.2473, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.3447181284427643, |
|
"learning_rate": 6.291457286432161e-05, |
|
"loss": 1.247, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.32790473103523254, |
|
"learning_rate": 6.271356783919599e-05, |
|
"loss": 1.2803, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 0.34671932458877563, |
|
"learning_rate": 6.251256281407035e-05, |
|
"loss": 1.2403, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.34242817759513855, |
|
"learning_rate": 6.231155778894473e-05, |
|
"loss": 1.2525, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.33679676055908203, |
|
"learning_rate": 6.211055276381909e-05, |
|
"loss": 1.2433, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.35853180289268494, |
|
"learning_rate": 6.190954773869348e-05, |
|
"loss": 1.2014, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.3501017391681671, |
|
"learning_rate": 6.170854271356784e-05, |
|
"loss": 1.1814, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.3347374200820923, |
|
"learning_rate": 6.150753768844222e-05, |
|
"loss": 1.2137, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.33397722244262695, |
|
"learning_rate": 6.130653266331658e-05, |
|
"loss": 1.1753, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.33950281143188477, |
|
"learning_rate": 6.110552763819096e-05, |
|
"loss": 1.2455, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.3363599479198456, |
|
"learning_rate": 6.090452261306533e-05, |
|
"loss": 1.2087, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.3455164134502411, |
|
"learning_rate": 6.070351758793971e-05, |
|
"loss": 1.2711, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.3824455440044403, |
|
"learning_rate": 6.0502512562814076e-05, |
|
"loss": 1.2345, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.3414604961872101, |
|
"learning_rate": 6.030150753768844e-05, |
|
"loss": 1.2464, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.3410933017730713, |
|
"learning_rate": 6.0100502512562815e-05, |
|
"loss": 1.2505, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.3278619945049286, |
|
"learning_rate": 5.989949748743718e-05, |
|
"loss": 1.2373, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.34170377254486084, |
|
"learning_rate": 5.969849246231156e-05, |
|
"loss": 1.2528, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.3370874226093292, |
|
"learning_rate": 5.949748743718593e-05, |
|
"loss": 1.2484, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.3313332796096802, |
|
"learning_rate": 5.929648241206031e-05, |
|
"loss": 1.2382, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.3433292508125305, |
|
"learning_rate": 5.909547738693467e-05, |
|
"loss": 1.2213, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.3642396926879883, |
|
"learning_rate": 5.889447236180905e-05, |
|
"loss": 1.1992, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.33468329906463623, |
|
"learning_rate": 5.869346733668342e-05, |
|
"loss": 1.2648, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.34776145219802856, |
|
"learning_rate": 5.849246231155779e-05, |
|
"loss": 1.2781, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 0.34735772013664246, |
|
"learning_rate": 5.829145728643216e-05, |
|
"loss": 1.3148, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.30450117588043213, |
|
"learning_rate": 5.809045226130654e-05, |
|
"loss": 1.2433, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.34202051162719727, |
|
"learning_rate": 5.7889447236180904e-05, |
|
"loss": 1.3233, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.3289657235145569, |
|
"learning_rate": 5.7688442211055284e-05, |
|
"loss": 1.1637, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.3224788308143616, |
|
"learning_rate": 5.748743718592965e-05, |
|
"loss": 1.2004, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.34642502665519714, |
|
"learning_rate": 5.728643216080403e-05, |
|
"loss": 1.2703, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.3221339285373688, |
|
"learning_rate": 5.7085427135678396e-05, |
|
"loss": 1.2686, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.3153306245803833, |
|
"learning_rate": 5.688442211055277e-05, |
|
"loss": 1.1924, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.3397902846336365, |
|
"learning_rate": 5.6683417085427135e-05, |
|
"loss": 1.2925, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.31026211380958557, |
|
"learning_rate": 5.6482412060301515e-05, |
|
"loss": 1.215, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.36266523599624634, |
|
"learning_rate": 5.628140703517588e-05, |
|
"loss": 1.1878, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.3195384740829468, |
|
"learning_rate": 5.608040201005026e-05, |
|
"loss": 1.2683, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.3288376033306122, |
|
"learning_rate": 5.587939698492463e-05, |
|
"loss": 1.3016, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.3408261835575104, |
|
"learning_rate": 5.567839195979899e-05, |
|
"loss": 1.2068, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.35029324889183044, |
|
"learning_rate": 5.547738693467337e-05, |
|
"loss": 1.1991, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.3637566566467285, |
|
"learning_rate": 5.527638190954774e-05, |
|
"loss": 1.2342, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.3418453633785248, |
|
"learning_rate": 5.507537688442211e-05, |
|
"loss": 1.3175, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.35708463191986084, |
|
"learning_rate": 5.487437185929648e-05, |
|
"loss": 1.2073, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.37137219309806824, |
|
"learning_rate": 5.467336683417086e-05, |
|
"loss": 1.2347, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.3417325019836426, |
|
"learning_rate": 5.4472361809045224e-05, |
|
"loss": 1.1796, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.3448787331581116, |
|
"learning_rate": 5.4271356783919604e-05, |
|
"loss": 1.2761, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.3505607843399048, |
|
"learning_rate": 5.407035175879397e-05, |
|
"loss": 1.2748, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 0.3447292149066925, |
|
"learning_rate": 5.386934673366835e-05, |
|
"loss": 1.2081, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.36144357919692993, |
|
"learning_rate": 5.3668341708542716e-05, |
|
"loss": 1.2111, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.34409165382385254, |
|
"learning_rate": 5.346733668341709e-05, |
|
"loss": 1.232, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.34955504536628723, |
|
"learning_rate": 5.3266331658291455e-05, |
|
"loss": 1.2325, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.38873958587646484, |
|
"learning_rate": 5.3065326633165835e-05, |
|
"loss": 1.253, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.3274456560611725, |
|
"learning_rate": 5.28643216080402e-05, |
|
"loss": 1.2484, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.35621777176856995, |
|
"learning_rate": 5.266331658291458e-05, |
|
"loss": 1.293, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.33436283469200134, |
|
"learning_rate": 5.246231155778895e-05, |
|
"loss": 1.2933, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.35243648290634155, |
|
"learning_rate": 5.226130653266332e-05, |
|
"loss": 1.2969, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.34483468532562256, |
|
"learning_rate": 5.206030150753769e-05, |
|
"loss": 1.2491, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.32675257325172424, |
|
"learning_rate": 5.1859296482412066e-05, |
|
"loss": 1.2094, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.34875720739364624, |
|
"learning_rate": 5.165829145728643e-05, |
|
"loss": 1.2774, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.34670987725257874, |
|
"learning_rate": 5.145728643216081e-05, |
|
"loss": 1.2014, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.34661754965782166, |
|
"learning_rate": 5.125628140703518e-05, |
|
"loss": 1.3128, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.3611772358417511, |
|
"learning_rate": 5.1055276381909544e-05, |
|
"loss": 1.1779, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.3536320626735687, |
|
"learning_rate": 5.0854271356783924e-05, |
|
"loss": 1.2185, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.3396211564540863, |
|
"learning_rate": 5.065326633165829e-05, |
|
"loss": 1.2267, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.3299081325531006, |
|
"learning_rate": 5.045226130653266e-05, |
|
"loss": 1.234, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.34536656737327576, |
|
"learning_rate": 5.0251256281407036e-05, |
|
"loss": 1.1687, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.3260354697704315, |
|
"learning_rate": 5.005025125628141e-05, |
|
"loss": 1.2668, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.3403102457523346, |
|
"learning_rate": 4.984924623115578e-05, |
|
"loss": 1.2686, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.3585387170314789, |
|
"learning_rate": 4.9648241206030155e-05, |
|
"loss": 1.2281, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.3247324824333191, |
|
"learning_rate": 4.944723618090453e-05, |
|
"loss": 1.2756, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.36049649119377136, |
|
"learning_rate": 4.92462311557789e-05, |
|
"loss": 1.3494, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.32784217596054077, |
|
"learning_rate": 4.9045226130653274e-05, |
|
"loss": 1.2805, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.33943304419517517, |
|
"learning_rate": 4.884422110552764e-05, |
|
"loss": 1.2258, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.3395639657974243, |
|
"learning_rate": 4.864321608040201e-05, |
|
"loss": 1.1176, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.32322150468826294, |
|
"learning_rate": 4.844221105527638e-05, |
|
"loss": 1.1269, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.32815924286842346, |
|
"learning_rate": 4.824120603015075e-05, |
|
"loss": 1.334, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.33146142959594727, |
|
"learning_rate": 4.8040201005025125e-05, |
|
"loss": 1.1741, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.3309881389141083, |
|
"learning_rate": 4.78391959798995e-05, |
|
"loss": 1.2565, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.36289462447166443, |
|
"learning_rate": 4.763819095477387e-05, |
|
"loss": 1.1769, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.31728067994117737, |
|
"learning_rate": 4.7437185929648244e-05, |
|
"loss": 1.1964, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.3471260964870453, |
|
"learning_rate": 4.723618090452262e-05, |
|
"loss": 1.2827, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.3100842535495758, |
|
"learning_rate": 4.703517587939698e-05, |
|
"loss": 1.1399, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.32581594586372375, |
|
"learning_rate": 4.6834170854271356e-05, |
|
"loss": 1.2041, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.33643782138824463, |
|
"learning_rate": 4.663316582914573e-05, |
|
"loss": 1.2493, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.34091752767562866, |
|
"learning_rate": 4.64321608040201e-05, |
|
"loss": 1.2265, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.3271618187427521, |
|
"learning_rate": 4.6231155778894475e-05, |
|
"loss": 1.183, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.3240033686161041, |
|
"learning_rate": 4.603015075376885e-05, |
|
"loss": 1.2065, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.3427727520465851, |
|
"learning_rate": 4.582914572864322e-05, |
|
"loss": 1.1634, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.31667765974998474, |
|
"learning_rate": 4.5628140703517594e-05, |
|
"loss": 1.2091, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.326895534992218, |
|
"learning_rate": 4.542713567839196e-05, |
|
"loss": 1.2075, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.32184022665023804, |
|
"learning_rate": 4.522613065326633e-05, |
|
"loss": 1.206, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 0.3442372679710388, |
|
"learning_rate": 4.5025125628140706e-05, |
|
"loss": 1.2528, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.3295699656009674, |
|
"learning_rate": 4.482412060301508e-05, |
|
"loss": 1.2723, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.33833664655685425, |
|
"learning_rate": 4.462311557788945e-05, |
|
"loss": 1.2277, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.33365553617477417, |
|
"learning_rate": 4.4422110552763825e-05, |
|
"loss": 1.2101, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.340986967086792, |
|
"learning_rate": 4.42211055276382e-05, |
|
"loss": 1.2342, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.3409508764743805, |
|
"learning_rate": 4.4020100502512564e-05, |
|
"loss": 1.2387, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.33809345960617065, |
|
"learning_rate": 4.381909547738694e-05, |
|
"loss": 1.2864, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.34557396173477173, |
|
"learning_rate": 4.3618090452261303e-05, |
|
"loss": 1.1534, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.35186269879341125, |
|
"learning_rate": 4.3417085427135676e-05, |
|
"loss": 1.2989, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.3429466187953949, |
|
"learning_rate": 4.321608040201005e-05, |
|
"loss": 1.2207, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.3693259060382843, |
|
"learning_rate": 4.301507537688442e-05, |
|
"loss": 1.2331, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.320137083530426, |
|
"learning_rate": 4.2814070351758795e-05, |
|
"loss": 1.2518, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.34945398569107056, |
|
"learning_rate": 4.261306532663317e-05, |
|
"loss": 1.2906, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.32532766461372375, |
|
"learning_rate": 4.241206030150754e-05, |
|
"loss": 1.1813, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.34059426188468933, |
|
"learning_rate": 4.2211055276381914e-05, |
|
"loss": 1.2145, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.3278336226940155, |
|
"learning_rate": 4.201005025125628e-05, |
|
"loss": 1.2438, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.3421019911766052, |
|
"learning_rate": 4.180904522613065e-05, |
|
"loss": 1.0761, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.34605157375335693, |
|
"learning_rate": 4.1608040201005026e-05, |
|
"loss": 1.2483, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.36613568663597107, |
|
"learning_rate": 4.14070351758794e-05, |
|
"loss": 1.313, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.33103081583976746, |
|
"learning_rate": 4.120603015075377e-05, |
|
"loss": 1.2694, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.3223966956138611, |
|
"learning_rate": 4.1005025125628145e-05, |
|
"loss": 1.3503, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.3374450206756592, |
|
"learning_rate": 4.080402010050252e-05, |
|
"loss": 1.1971, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 0.33531078696250916, |
|
"learning_rate": 4.060301507537689e-05, |
|
"loss": 1.129, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.32305672764778137, |
|
"learning_rate": 4.040201005025126e-05, |
|
"loss": 1.2788, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.3243289887905121, |
|
"learning_rate": 4.020100502512563e-05, |
|
"loss": 1.2627, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.34850698709487915, |
|
"learning_rate": 4e-05, |
|
"loss": 1.2736, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.32808718085289, |
|
"learning_rate": 3.9798994974874376e-05, |
|
"loss": 1.1288, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.32883942127227783, |
|
"learning_rate": 3.959798994974875e-05, |
|
"loss": 1.2443, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.31969812512397766, |
|
"learning_rate": 3.9396984924623115e-05, |
|
"loss": 1.1762, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.339046835899353, |
|
"learning_rate": 3.919597989949749e-05, |
|
"loss": 1.2671, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.33851614594459534, |
|
"learning_rate": 3.899497487437186e-05, |
|
"loss": 1.2257, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.3604491055011749, |
|
"learning_rate": 3.8793969849246234e-05, |
|
"loss": 1.1635, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.3570076823234558, |
|
"learning_rate": 3.85929648241206e-05, |
|
"loss": 1.2682, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.32678258419036865, |
|
"learning_rate": 3.8391959798994973e-05, |
|
"loss": 1.2212, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.2903866767883301, |
|
"learning_rate": 3.8190954773869346e-05, |
|
"loss": 1.115, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.33241915702819824, |
|
"learning_rate": 3.798994974874372e-05, |
|
"loss": 1.2963, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.36244097352027893, |
|
"learning_rate": 3.778894472361809e-05, |
|
"loss": 1.346, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.3391794264316559, |
|
"learning_rate": 3.7587939698492465e-05, |
|
"loss": 1.2304, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.3356548249721527, |
|
"learning_rate": 3.738693467336684e-05, |
|
"loss": 1.2342, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.33378085494041443, |
|
"learning_rate": 3.7185929648241204e-05, |
|
"loss": 1.2352, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.35757875442504883, |
|
"learning_rate": 3.698492462311558e-05, |
|
"loss": 1.2991, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.30651041865348816, |
|
"learning_rate": 3.678391959798995e-05, |
|
"loss": 1.2426, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.34417861700057983, |
|
"learning_rate": 3.658291457286432e-05, |
|
"loss": 1.3361, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.31958290934562683, |
|
"learning_rate": 3.6381909547738696e-05, |
|
"loss": 1.2257, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 0.35524600744247437, |
|
"learning_rate": 3.618090452261307e-05, |
|
"loss": 1.3071, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.3114381432533264, |
|
"learning_rate": 3.597989949748744e-05, |
|
"loss": 1.1885, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.3464575409889221, |
|
"learning_rate": 3.5778894472361815e-05, |
|
"loss": 1.2519, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.3141149878501892, |
|
"learning_rate": 3.557788944723618e-05, |
|
"loss": 1.2193, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.3367476463317871, |
|
"learning_rate": 3.5376884422110554e-05, |
|
"loss": 1.2816, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.34699156880378723, |
|
"learning_rate": 3.517587939698493e-05, |
|
"loss": 1.3171, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.3360113501548767, |
|
"learning_rate": 3.49748743718593e-05, |
|
"loss": 1.1428, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.35045570135116577, |
|
"learning_rate": 3.4773869346733667e-05, |
|
"loss": 1.2928, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.3526917099952698, |
|
"learning_rate": 3.457286432160804e-05, |
|
"loss": 1.1539, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.3474697768688202, |
|
"learning_rate": 3.437185929648241e-05, |
|
"loss": 1.2148, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.410773366689682, |
|
"learning_rate": 3.4170854271356785e-05, |
|
"loss": 1.1966, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.33725374937057495, |
|
"learning_rate": 3.396984924623116e-05, |
|
"loss": 1.2784, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.3344865143299103, |
|
"learning_rate": 3.3768844221105525e-05, |
|
"loss": 1.1797, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.3316870331764221, |
|
"learning_rate": 3.35678391959799e-05, |
|
"loss": 1.2463, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.3409169912338257, |
|
"learning_rate": 3.336683417085427e-05, |
|
"loss": 1.4216, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.34283575415611267, |
|
"learning_rate": 3.3165829145728643e-05, |
|
"loss": 1.2293, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.3508949279785156, |
|
"learning_rate": 3.2964824120603016e-05, |
|
"loss": 1.3677, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.34627601504325867, |
|
"learning_rate": 3.276381909547739e-05, |
|
"loss": 1.1889, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.3398142457008362, |
|
"learning_rate": 3.256281407035176e-05, |
|
"loss": 1.1853, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.34818077087402344, |
|
"learning_rate": 3.2361809045226135e-05, |
|
"loss": 1.2272, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.34182122349739075, |
|
"learning_rate": 3.21608040201005e-05, |
|
"loss": 1.236, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.33442428708076477, |
|
"learning_rate": 3.1959798994974875e-05, |
|
"loss": 1.2327, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 0.3499019742012024, |
|
"learning_rate": 3.175879396984925e-05, |
|
"loss": 1.2771, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.3252609074115753, |
|
"learning_rate": 3.155778894472362e-05, |
|
"loss": 1.2988, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.3448849022388458, |
|
"learning_rate": 3.1356783919597993e-05, |
|
"loss": 1.1551, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.34106680750846863, |
|
"learning_rate": 3.1155778894472366e-05, |
|
"loss": 1.2141, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.3367229104042053, |
|
"learning_rate": 3.095477386934674e-05, |
|
"loss": 1.2251, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.341509073972702, |
|
"learning_rate": 3.075376884422111e-05, |
|
"loss": 1.2391, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.3711039125919342, |
|
"learning_rate": 3.055276381909548e-05, |
|
"loss": 1.2643, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.36982688307762146, |
|
"learning_rate": 3.0351758793969855e-05, |
|
"loss": 1.2131, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.3039700984954834, |
|
"learning_rate": 3.015075376884422e-05, |
|
"loss": 1.1973, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.33018019795417786, |
|
"learning_rate": 2.994974874371859e-05, |
|
"loss": 1.2517, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.33908405900001526, |
|
"learning_rate": 2.9748743718592964e-05, |
|
"loss": 1.1586, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.34731873869895935, |
|
"learning_rate": 2.9547738693467337e-05, |
|
"loss": 1.2373, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.3319443464279175, |
|
"learning_rate": 2.934673366834171e-05, |
|
"loss": 1.2585, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.33522048592567444, |
|
"learning_rate": 2.914572864321608e-05, |
|
"loss": 1.2501, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.3560466766357422, |
|
"learning_rate": 2.8944723618090452e-05, |
|
"loss": 1.1548, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.3441299796104431, |
|
"learning_rate": 2.8743718592964825e-05, |
|
"loss": 1.3083, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.32371416687965393, |
|
"learning_rate": 2.8542713567839198e-05, |
|
"loss": 1.3437, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.34365978837013245, |
|
"learning_rate": 2.8341708542713568e-05, |
|
"loss": 1.2776, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.34733307361602783, |
|
"learning_rate": 2.814070351758794e-05, |
|
"loss": 1.261, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.3306278884410858, |
|
"learning_rate": 2.7939698492462314e-05, |
|
"loss": 1.1925, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.3588391840457916, |
|
"learning_rate": 2.7738693467336686e-05, |
|
"loss": 1.2524, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.32391244173049927, |
|
"learning_rate": 2.7537688442211056e-05, |
|
"loss": 1.2479, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 0.37597063183784485, |
|
"learning_rate": 2.733668341708543e-05, |
|
"loss": 1.2458, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.32394328713417053, |
|
"learning_rate": 2.7135678391959802e-05, |
|
"loss": 1.2868, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.3270561993122101, |
|
"learning_rate": 2.6934673366834175e-05, |
|
"loss": 1.1915, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.36114001274108887, |
|
"learning_rate": 2.6733668341708545e-05, |
|
"loss": 1.1831, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.33239543437957764, |
|
"learning_rate": 2.6532663316582917e-05, |
|
"loss": 1.2756, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.3513084352016449, |
|
"learning_rate": 2.633165829145729e-05, |
|
"loss": 1.1912, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.3425911068916321, |
|
"learning_rate": 2.613065326633166e-05, |
|
"loss": 1.1919, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.35736554861068726, |
|
"learning_rate": 2.5929648241206033e-05, |
|
"loss": 1.3152, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.33599066734313965, |
|
"learning_rate": 2.5728643216080406e-05, |
|
"loss": 1.1916, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.33452367782592773, |
|
"learning_rate": 2.5527638190954772e-05, |
|
"loss": 1.1698, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.3205559253692627, |
|
"learning_rate": 2.5326633165829145e-05, |
|
"loss": 1.2197, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.337720662355423, |
|
"learning_rate": 2.5125628140703518e-05, |
|
"loss": 1.2132, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.4287751615047455, |
|
"learning_rate": 2.492462311557789e-05, |
|
"loss": 1.2187, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.33593013882637024, |
|
"learning_rate": 2.4723618090452264e-05, |
|
"loss": 1.293, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.33557310700416565, |
|
"learning_rate": 2.4522613065326637e-05, |
|
"loss": 1.2706, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.33586448431015015, |
|
"learning_rate": 2.4321608040201007e-05, |
|
"loss": 1.1899, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.3143567740917206, |
|
"learning_rate": 2.4120603015075376e-05, |
|
"loss": 1.2161, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.33706820011138916, |
|
"learning_rate": 2.391959798994975e-05, |
|
"loss": 1.2193, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.31664061546325684, |
|
"learning_rate": 2.3718592964824122e-05, |
|
"loss": 1.174, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.33685946464538574, |
|
"learning_rate": 2.351758793969849e-05, |
|
"loss": 1.172, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.33722755312919617, |
|
"learning_rate": 2.3316582914572865e-05, |
|
"loss": 1.2743, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 0.32739368081092834, |
|
"learning_rate": 2.3115577889447238e-05, |
|
"loss": 1.2713, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.34132125973701477, |
|
"learning_rate": 2.291457286432161e-05, |
|
"loss": 1.3329, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.3514713943004608, |
|
"learning_rate": 2.271356783919598e-05, |
|
"loss": 1.2198, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.3208943009376526, |
|
"learning_rate": 2.2512562814070353e-05, |
|
"loss": 1.2092, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.32605454325675964, |
|
"learning_rate": 2.2311557788944726e-05, |
|
"loss": 1.2181, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.3214203119277954, |
|
"learning_rate": 2.21105527638191e-05, |
|
"loss": 1.068, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.3456685543060303, |
|
"learning_rate": 2.190954773869347e-05, |
|
"loss": 1.2062, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.33764395117759705, |
|
"learning_rate": 2.1708542713567838e-05, |
|
"loss": 1.2975, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.3418406844139099, |
|
"learning_rate": 2.150753768844221e-05, |
|
"loss": 1.3145, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.32421067357063293, |
|
"learning_rate": 2.1306532663316584e-05, |
|
"loss": 1.2247, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.32601818442344666, |
|
"learning_rate": 2.1105527638190957e-05, |
|
"loss": 1.2817, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.3486829698085785, |
|
"learning_rate": 2.0904522613065327e-05, |
|
"loss": 1.215, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.31031641364097595, |
|
"learning_rate": 2.07035175879397e-05, |
|
"loss": 1.266, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.36151039600372314, |
|
"learning_rate": 2.0502512562814073e-05, |
|
"loss": 1.3193, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.33280953764915466, |
|
"learning_rate": 2.0301507537688446e-05, |
|
"loss": 1.2555, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.34150639176368713, |
|
"learning_rate": 2.0100502512562815e-05, |
|
"loss": 1.2865, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.3205152153968811, |
|
"learning_rate": 1.9899497487437188e-05, |
|
"loss": 1.1683, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.32988911867141724, |
|
"learning_rate": 1.9698492462311558e-05, |
|
"loss": 1.2357, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.31103867292404175, |
|
"learning_rate": 1.949748743718593e-05, |
|
"loss": 1.1425, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.3254905939102173, |
|
"learning_rate": 1.92964824120603e-05, |
|
"loss": 1.2725, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.34088942408561707, |
|
"learning_rate": 1.9095477386934673e-05, |
|
"loss": 1.2719, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.331760048866272, |
|
"learning_rate": 1.8894472361809046e-05, |
|
"loss": 1.2883, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 0.32167789340019226, |
|
"learning_rate": 1.869346733668342e-05, |
|
"loss": 1.1878, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.31863799691200256, |
|
"learning_rate": 1.849246231155779e-05, |
|
"loss": 1.2426, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.31977197527885437, |
|
"learning_rate": 1.829145728643216e-05, |
|
"loss": 1.2317, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.32206329703330994, |
|
"learning_rate": 1.8090452261306535e-05, |
|
"loss": 1.2461, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.3464859127998352, |
|
"learning_rate": 1.7889447236180908e-05, |
|
"loss": 1.2019, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.3386858105659485, |
|
"learning_rate": 1.7688442211055277e-05, |
|
"loss": 1.1637, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.33315742015838623, |
|
"learning_rate": 1.748743718592965e-05, |
|
"loss": 1.2268, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.3428650200366974, |
|
"learning_rate": 1.728643216080402e-05, |
|
"loss": 1.1709, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.32808947563171387, |
|
"learning_rate": 1.7085427135678393e-05, |
|
"loss": 1.246, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.3213931918144226, |
|
"learning_rate": 1.6884422110552762e-05, |
|
"loss": 1.1798, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.32325300574302673, |
|
"learning_rate": 1.6683417085427135e-05, |
|
"loss": 1.2045, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.34444937109947205, |
|
"learning_rate": 1.6482412060301508e-05, |
|
"loss": 1.1657, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.34642207622528076, |
|
"learning_rate": 1.628140703517588e-05, |
|
"loss": 1.2589, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.321769654750824, |
|
"learning_rate": 1.608040201005025e-05, |
|
"loss": 1.2896, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.32652974128723145, |
|
"learning_rate": 1.5879396984924624e-05, |
|
"loss": 1.239, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.32902684807777405, |
|
"learning_rate": 1.5678391959798997e-05, |
|
"loss": 1.1761, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.3562852442264557, |
|
"learning_rate": 1.547738693467337e-05, |
|
"loss": 1.2119, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.32487592101097107, |
|
"learning_rate": 1.527638190954774e-05, |
|
"loss": 1.2713, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.3527469038963318, |
|
"learning_rate": 1.507537688442211e-05, |
|
"loss": 1.2803, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.32758113741874695, |
|
"learning_rate": 1.4874371859296482e-05, |
|
"loss": 1.239, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.3581792116165161, |
|
"learning_rate": 1.4673366834170855e-05, |
|
"loss": 1.1578, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.3269941806793213, |
|
"learning_rate": 1.4472361809045226e-05, |
|
"loss": 1.2673, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 0.33714571595191956, |
|
"learning_rate": 1.4271356783919599e-05, |
|
"loss": 1.1647, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.3444203734397888, |
|
"learning_rate": 1.407035175879397e-05, |
|
"loss": 1.2443, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.3266572952270508, |
|
"learning_rate": 1.3869346733668343e-05, |
|
"loss": 1.2083, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.33283984661102295, |
|
"learning_rate": 1.3668341708542715e-05, |
|
"loss": 1.2936, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.31751376390457153, |
|
"learning_rate": 1.3467336683417087e-05, |
|
"loss": 1.2752, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.31209132075309753, |
|
"learning_rate": 1.3266331658291459e-05, |
|
"loss": 1.2414, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.33048173785209656, |
|
"learning_rate": 1.306532663316583e-05, |
|
"loss": 1.2501, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.35573264956474304, |
|
"learning_rate": 1.2864321608040203e-05, |
|
"loss": 1.2298, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.34101277589797974, |
|
"learning_rate": 1.2663316582914573e-05, |
|
"loss": 1.2113, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.3265811502933502, |
|
"learning_rate": 1.2462311557788946e-05, |
|
"loss": 1.2443, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.3247004449367523, |
|
"learning_rate": 1.2261306532663318e-05, |
|
"loss": 1.2098, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.3484182357788086, |
|
"learning_rate": 1.2060301507537688e-05, |
|
"loss": 1.2731, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.3221377432346344, |
|
"learning_rate": 1.1859296482412061e-05, |
|
"loss": 1.1475, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.2977326512336731, |
|
"learning_rate": 1.1658291457286432e-05, |
|
"loss": 1.2007, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.3372350335121155, |
|
"learning_rate": 1.1457286432160805e-05, |
|
"loss": 1.3172, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.33364465832710266, |
|
"learning_rate": 1.1256281407035177e-05, |
|
"loss": 1.1503, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.34927648305892944, |
|
"learning_rate": 1.105527638190955e-05, |
|
"loss": 1.2117, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.3644360601902008, |
|
"learning_rate": 1.0854271356783919e-05, |
|
"loss": 1.2228, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.32903411984443665, |
|
"learning_rate": 1.0653266331658292e-05, |
|
"loss": 1.1774, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.31273844838142395, |
|
"learning_rate": 1.0452261306532663e-05, |
|
"loss": 1.2441, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.3472016453742981, |
|
"learning_rate": 1.0251256281407036e-05, |
|
"loss": 1.2373, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.33334019780158997, |
|
"learning_rate": 1.0050251256281408e-05, |
|
"loss": 1.2008, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 0.3392302393913269, |
|
"learning_rate": 9.849246231155779e-06, |
|
"loss": 1.2857, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.3528233468532562, |
|
"learning_rate": 9.64824120603015e-06, |
|
"loss": 1.2599, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.32364121079444885, |
|
"learning_rate": 9.447236180904523e-06, |
|
"loss": 1.215, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.3271343410015106, |
|
"learning_rate": 9.246231155778894e-06, |
|
"loss": 1.2212, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.3110508620738983, |
|
"learning_rate": 9.045226130653267e-06, |
|
"loss": 1.2951, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.3501865267753601, |
|
"learning_rate": 8.844221105527639e-06, |
|
"loss": 1.2195, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.35401347279548645, |
|
"learning_rate": 8.64321608040201e-06, |
|
"loss": 1.1406, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.3467148542404175, |
|
"learning_rate": 8.442211055276381e-06, |
|
"loss": 1.2076, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.5495727062225342, |
|
"learning_rate": 8.241206030150754e-06, |
|
"loss": 1.2051, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.34182611107826233, |
|
"learning_rate": 8.040201005025125e-06, |
|
"loss": 1.2087, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.33628034591674805, |
|
"learning_rate": 7.839195979899498e-06, |
|
"loss": 1.1792, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.3278791904449463, |
|
"learning_rate": 7.63819095477387e-06, |
|
"loss": 1.2596, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.35457128286361694, |
|
"learning_rate": 7.437185929648241e-06, |
|
"loss": 1.2581, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.3405055105686188, |
|
"learning_rate": 7.236180904522613e-06, |
|
"loss": 1.2284, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.3680908679962158, |
|
"learning_rate": 7.035175879396985e-06, |
|
"loss": 1.2552, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.34292829036712646, |
|
"learning_rate": 6.834170854271357e-06, |
|
"loss": 1.1868, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.35092589259147644, |
|
"learning_rate": 6.633165829145729e-06, |
|
"loss": 1.2687, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.34995222091674805, |
|
"learning_rate": 6.4321608040201015e-06, |
|
"loss": 1.221, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.32289987802505493, |
|
"learning_rate": 6.231155778894473e-06, |
|
"loss": 1.2722, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.3334210515022278, |
|
"learning_rate": 6.030150753768844e-06, |
|
"loss": 1.2668, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.3216976523399353, |
|
"learning_rate": 5.829145728643216e-06, |
|
"loss": 1.3364, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.33897489309310913, |
|
"learning_rate": 5.628140703517588e-06, |
|
"loss": 1.1858, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 0.33176472783088684, |
|
"learning_rate": 5.4271356783919595e-06, |
|
"loss": 1.2104, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.32903414964675903, |
|
"learning_rate": 5.226130653266332e-06, |
|
"loss": 1.2193, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.3290814459323883, |
|
"learning_rate": 5.025125628140704e-06, |
|
"loss": 1.2564, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.32403564453125, |
|
"learning_rate": 4.824120603015075e-06, |
|
"loss": 1.1738, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.3256121277809143, |
|
"learning_rate": 4.623115577889447e-06, |
|
"loss": 1.2547, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.3145628571510315, |
|
"learning_rate": 4.422110552763819e-06, |
|
"loss": 1.196, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.3379940092563629, |
|
"learning_rate": 4.2211055276381906e-06, |
|
"loss": 1.3159, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.33039796352386475, |
|
"learning_rate": 4.020100502512563e-06, |
|
"loss": 1.1869, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.3251807391643524, |
|
"learning_rate": 3.819095477386935e-06, |
|
"loss": 1.2726, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.33046984672546387, |
|
"learning_rate": 3.6180904522613065e-06, |
|
"loss": 1.2509, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.32313692569732666, |
|
"learning_rate": 3.4170854271356786e-06, |
|
"loss": 1.308, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.34376001358032227, |
|
"learning_rate": 3.2160804020100507e-06, |
|
"loss": 1.3215, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.33600521087646484, |
|
"learning_rate": 3.015075376884422e-06, |
|
"loss": 1.2346, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.33546069264411926, |
|
"learning_rate": 2.814070351758794e-06, |
|
"loss": 1.2924, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.3507489264011383, |
|
"learning_rate": 2.613065326633166e-06, |
|
"loss": 1.2574, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.3476717472076416, |
|
"learning_rate": 2.4120603015075375e-06, |
|
"loss": 1.2874, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.3193936347961426, |
|
"learning_rate": 2.2110552763819096e-06, |
|
"loss": 1.2534, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.35533928871154785, |
|
"learning_rate": 2.0100502512562813e-06, |
|
"loss": 1.2738, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.33337244391441345, |
|
"learning_rate": 1.8090452261306533e-06, |
|
"loss": 1.261, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.34682735800743103, |
|
"learning_rate": 1.6080402010050254e-06, |
|
"loss": 1.248, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.33268532156944275, |
|
"learning_rate": 1.407035175879397e-06, |
|
"loss": 1.1575, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.3305032253265381, |
|
"learning_rate": 1.2060301507537688e-06, |
|
"loss": 1.2118, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 0.3230968117713928, |
|
"learning_rate": 1.0050251256281407e-06, |
|
"loss": 1.1977, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.3403209447860718, |
|
"learning_rate": 8.040201005025127e-07, |
|
"loss": 1.2415, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.31072068214416504, |
|
"learning_rate": 6.030150753768844e-07, |
|
"loss": 1.1967, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.3351586163043976, |
|
"learning_rate": 4.0201005025125634e-07, |
|
"loss": 1.208, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.33677101135253906, |
|
"learning_rate": 2.0100502512562817e-07, |
|
"loss": 1.2926, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 0.332905113697052, |
|
"learning_rate": 0.0, |
|
"loss": 1.1932, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 200, |
|
"total_flos": 4.636526208766771e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|