|
{ |
|
"best_metric": 6.867819786071777, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-100", |
|
"epoch": 0.023317678741502184, |
|
"eval_steps": 25, |
|
"global_step": 142, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00016420900522184638, |
|
"grad_norm": 0.18168503046035767, |
|
"learning_rate": 2e-05, |
|
"loss": 6.9428, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00016420900522184638, |
|
"eval_loss": 6.944418907165527, |
|
"eval_runtime": 0.6891, |
|
"eval_samples_per_second": 72.556, |
|
"eval_steps_per_second": 13.06, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00032841801044369276, |
|
"grad_norm": 0.1871073693037033, |
|
"learning_rate": 4e-05, |
|
"loss": 6.9417, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0004926270156655391, |
|
"grad_norm": 0.18547073006629944, |
|
"learning_rate": 6e-05, |
|
"loss": 6.9422, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0006568360208873855, |
|
"grad_norm": 0.19758914411067963, |
|
"learning_rate": 8e-05, |
|
"loss": 6.9364, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0008210450261092318, |
|
"grad_norm": 0.1958891600370407, |
|
"learning_rate": 0.0001, |
|
"loss": 6.9404, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0009852540313310782, |
|
"grad_norm": 0.20084300637245178, |
|
"learning_rate": 9.99881689824633e-05, |
|
"loss": 6.9411, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0011494630365529246, |
|
"grad_norm": 0.19459381699562073, |
|
"learning_rate": 9.995268215087426e-05, |
|
"loss": 6.9339, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.001313672041774771, |
|
"grad_norm": 0.20113036036491394, |
|
"learning_rate": 9.989355816502525e-05, |
|
"loss": 6.9377, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0014778810469966172, |
|
"grad_norm": 0.207706481218338, |
|
"learning_rate": 9.981082811366797e-05, |
|
"loss": 6.9359, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0016420900522184636, |
|
"grad_norm": 0.17941485345363617, |
|
"learning_rate": 9.970453549816632e-05, |
|
"loss": 6.9367, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.00180629905744031, |
|
"grad_norm": 0.20235049724578857, |
|
"learning_rate": 9.957473620962246e-05, |
|
"loss": 6.9335, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0019705080626621564, |
|
"grad_norm": 0.2030957043170929, |
|
"learning_rate": 9.94214984994879e-05, |
|
"loss": 6.9291, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0021347170678840026, |
|
"grad_norm": 0.21488922834396362, |
|
"learning_rate": 9.924490294367533e-05, |
|
"loss": 6.9362, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0022989260731058492, |
|
"grad_norm": 0.2119452953338623, |
|
"learning_rate": 9.904504240019e-05, |
|
"loss": 6.9315, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0024631350783276954, |
|
"grad_norm": 0.2180822491645813, |
|
"learning_rate": 9.88220219603028e-05, |
|
"loss": 6.9296, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.002627344083549542, |
|
"grad_norm": 0.19745118916034698, |
|
"learning_rate": 9.85759588932908e-05, |
|
"loss": 6.9261, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0027915530887713882, |
|
"grad_norm": 0.20321951806545258, |
|
"learning_rate": 9.830698258477458e-05, |
|
"loss": 6.9285, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0029557620939932344, |
|
"grad_norm": 0.2177504599094391, |
|
"learning_rate": 9.801523446868399e-05, |
|
"loss": 6.9288, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.003119971099215081, |
|
"grad_norm": 0.21581852436065674, |
|
"learning_rate": 9.770086795288913e-05, |
|
"loss": 6.9266, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0032841801044369272, |
|
"grad_norm": 0.22525547444820404, |
|
"learning_rate": 9.736404833853502e-05, |
|
"loss": 6.9243, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.003448389109658774, |
|
"grad_norm": 0.24186238646507263, |
|
"learning_rate": 9.700495273312223e-05, |
|
"loss": 6.92, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.00361259811488062, |
|
"grad_norm": 0.21517185866832733, |
|
"learning_rate": 9.662376995737989e-05, |
|
"loss": 6.9185, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0037768071201024662, |
|
"grad_norm": 0.20967762172222137, |
|
"learning_rate": 9.622070044597935e-05, |
|
"loss": 6.9212, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.003941016125324313, |
|
"grad_norm": 0.23955151438713074, |
|
"learning_rate": 9.579595614214087e-05, |
|
"loss": 6.9187, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0041052251305461595, |
|
"grad_norm": 0.228123739361763, |
|
"learning_rate": 9.534976038618931e-05, |
|
"loss": 6.9194, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0041052251305461595, |
|
"eval_loss": 6.920280933380127, |
|
"eval_runtime": 0.1615, |
|
"eval_samples_per_second": 309.601, |
|
"eval_steps_per_second": 55.728, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.004269434135768005, |
|
"grad_norm": 0.24925658106803894, |
|
"learning_rate": 9.488234779811635e-05, |
|
"loss": 6.9146, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.004433643140989852, |
|
"grad_norm": 0.24287429451942444, |
|
"learning_rate": 9.439396415421204e-05, |
|
"loss": 6.9166, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0045978521462116985, |
|
"grad_norm": 0.2547374963760376, |
|
"learning_rate": 9.388486625782995e-05, |
|
"loss": 6.9139, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.004762061151433544, |
|
"grad_norm": 0.2530066668987274, |
|
"learning_rate": 9.335532180435412e-05, |
|
"loss": 6.9132, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.004926270156655391, |
|
"grad_norm": 0.2472325563430786, |
|
"learning_rate": 9.280560924043858e-05, |
|
"loss": 6.9146, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0050904791618772375, |
|
"grad_norm": 0.25216802954673767, |
|
"learning_rate": 9.223601761759367e-05, |
|
"loss": 6.915, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.005254688167099084, |
|
"grad_norm": 0.2528752386569977, |
|
"learning_rate": 9.164684644019624e-05, |
|
"loss": 6.9113, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.00541889717232093, |
|
"grad_norm": 0.24980315566062927, |
|
"learning_rate": 9.103840550800329e-05, |
|
"loss": 6.9052, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0055831061775427765, |
|
"grad_norm": 0.26271212100982666, |
|
"learning_rate": 9.041101475325209e-05, |
|
"loss": 6.9033, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.005747315182764623, |
|
"grad_norm": 0.2919352948665619, |
|
"learning_rate": 8.976500407243247e-05, |
|
"loss": 6.9013, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.005911524187986469, |
|
"grad_norm": 0.27319350838661194, |
|
"learning_rate": 8.910071315281975e-05, |
|
"loss": 6.9097, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0060757331932083155, |
|
"grad_norm": 0.29328155517578125, |
|
"learning_rate": 8.841849129385921e-05, |
|
"loss": 6.904, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.006239942198430162, |
|
"grad_norm": 0.2633126676082611, |
|
"learning_rate": 8.771869722349651e-05, |
|
"loss": 6.9065, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.006404151203652008, |
|
"grad_norm": 0.2593594789505005, |
|
"learning_rate": 8.700169890955027e-05, |
|
"loss": 6.8937, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0065683602088738545, |
|
"grad_norm": 0.2693818211555481, |
|
"learning_rate": 8.626787336622607e-05, |
|
"loss": 6.9007, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.006732569214095701, |
|
"grad_norm": 0.29676708579063416, |
|
"learning_rate": 8.55176064558738e-05, |
|
"loss": 6.8984, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.006896778219317548, |
|
"grad_norm": 0.3058428168296814, |
|
"learning_rate": 8.475129268609227e-05, |
|
"loss": 6.8938, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0070609872245393935, |
|
"grad_norm": 0.322170615196228, |
|
"learning_rate": 8.396933500228808e-05, |
|
"loss": 6.8846, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.00722519622976124, |
|
"grad_norm": 0.34816184639930725, |
|
"learning_rate": 8.317214457579773e-05, |
|
"loss": 6.8835, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.007389405234983087, |
|
"grad_norm": 0.32875800132751465, |
|
"learning_rate": 8.23601405876841e-05, |
|
"loss": 6.8953, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0075536142402049325, |
|
"grad_norm": 0.35221952199935913, |
|
"learning_rate": 8.153375000832157e-05, |
|
"loss": 6.8907, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.007717823245426779, |
|
"grad_norm": 0.363881915807724, |
|
"learning_rate": 8.069340737288512e-05, |
|
"loss": 6.8798, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.007882032250648626, |
|
"grad_norm": 0.44765737652778625, |
|
"learning_rate": 7.98395545528617e-05, |
|
"loss": 6.8804, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.008046241255870471, |
|
"grad_norm": 0.48432090878486633, |
|
"learning_rate": 7.897264052370409e-05, |
|
"loss": 6.8855, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.008210450261092319, |
|
"grad_norm": 0.6992037296295166, |
|
"learning_rate": 7.809312112874924e-05, |
|
"loss": 6.8687, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.008210450261092319, |
|
"eval_loss": 6.891801834106445, |
|
"eval_runtime": 0.1484, |
|
"eval_samples_per_second": 336.864, |
|
"eval_steps_per_second": 60.635, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.008374659266314165, |
|
"grad_norm": 0.23850670456886292, |
|
"learning_rate": 7.720145883952544e-05, |
|
"loss": 6.8929, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.00853886827153601, |
|
"grad_norm": 0.23872490227222443, |
|
"learning_rate": 7.629812251257401e-05, |
|
"loss": 6.8878, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.008703077276757858, |
|
"grad_norm": 0.25407809019088745, |
|
"learning_rate": 7.53835871429139e-05, |
|
"loss": 6.8843, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.008867286281979704, |
|
"grad_norm": 0.23447373509407043, |
|
"learning_rate": 7.445833361427828e-05, |
|
"loss": 6.8832, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.00903149528720155, |
|
"grad_norm": 0.2190893590450287, |
|
"learning_rate": 7.352284844625481e-05, |
|
"loss": 6.8877, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.009195704292423397, |
|
"grad_norm": 0.22474221885204315, |
|
"learning_rate": 7.257762353846257e-05, |
|
"loss": 6.8882, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.009359913297645243, |
|
"grad_norm": 0.21934156119823456, |
|
"learning_rate": 7.162315591189978e-05, |
|
"loss": 6.884, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.009524122302867088, |
|
"grad_norm": 0.22677500545978546, |
|
"learning_rate": 7.065994744759879e-05, |
|
"loss": 6.888, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.009688331308088936, |
|
"grad_norm": 0.21878303587436676, |
|
"learning_rate": 6.96885046227255e-05, |
|
"loss": 6.8825, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.009852540313310782, |
|
"grad_norm": 0.21341657638549805, |
|
"learning_rate": 6.8709338244262e-05, |
|
"loss": 6.8831, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.010016749318532627, |
|
"grad_norm": 0.20267024636268616, |
|
"learning_rate": 6.772296318041253e-05, |
|
"loss": 6.884, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.010180958323754475, |
|
"grad_norm": 0.19920732080936432, |
|
"learning_rate": 6.672989808987385e-05, |
|
"loss": 6.8809, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.01034516732897632, |
|
"grad_norm": 0.2178788185119629, |
|
"learning_rate": 6.573066514911273e-05, |
|
"loss": 6.8755, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.010509376334198168, |
|
"grad_norm": 0.210996612906456, |
|
"learning_rate": 6.472578977779339e-05, |
|
"loss": 6.8735, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.010673585339420014, |
|
"grad_norm": 0.19956302642822266, |
|
"learning_rate": 6.371580036249985e-05, |
|
"loss": 6.8776, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.01083779434464186, |
|
"grad_norm": 0.20250411331653595, |
|
"learning_rate": 6.270122797889806e-05, |
|
"loss": 6.8728, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.011002003349863707, |
|
"grad_norm": 0.20616371929645538, |
|
"learning_rate": 6.168260611248417e-05, |
|
"loss": 6.8754, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.011166212355085553, |
|
"grad_norm": 0.2024230659008026, |
|
"learning_rate": 6.066047037806549e-05, |
|
"loss": 6.8729, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.011330421360307399, |
|
"grad_norm": 0.19448429346084595, |
|
"learning_rate": 5.9635358238121954e-05, |
|
"loss": 6.872, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.011494630365529246, |
|
"grad_norm": 0.18782764673233032, |
|
"learning_rate": 5.860780872019601e-05, |
|
"loss": 6.8785, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.011658839370751092, |
|
"grad_norm": 0.1987268328666687, |
|
"learning_rate": 5.7578362133459494e-05, |
|
"loss": 6.868, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.011823048375972938, |
|
"grad_norm": 0.1736784130334854, |
|
"learning_rate": 5.6547559784606675e-05, |
|
"loss": 6.8722, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.011987257381194785, |
|
"grad_norm": 0.17477966845035553, |
|
"learning_rate": 5.551594369322271e-05, |
|
"loss": 6.8697, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.012151466386416631, |
|
"grad_norm": 0.19487003982067108, |
|
"learning_rate": 5.44840563067773e-05, |
|
"loss": 6.8721, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.012315675391638477, |
|
"grad_norm": 0.20581433176994324, |
|
"learning_rate": 5.3452440215393315e-05, |
|
"loss": 6.8684, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.012315675391638477, |
|
"eval_loss": 6.876440048217773, |
|
"eval_runtime": 0.1565, |
|
"eval_samples_per_second": 319.512, |
|
"eval_steps_per_second": 57.512, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.012479884396860324, |
|
"grad_norm": 0.18974201381206512, |
|
"learning_rate": 5.242163786654051e-05, |
|
"loss": 6.8705, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.01264409340208217, |
|
"grad_norm": 0.19225001335144043, |
|
"learning_rate": 5.139219127980399e-05, |
|
"loss": 6.866, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.012808302407304016, |
|
"grad_norm": 0.18675003945827484, |
|
"learning_rate": 5.036464176187806e-05, |
|
"loss": 6.8649, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.012972511412525863, |
|
"grad_norm": 0.19270944595336914, |
|
"learning_rate": 4.933952962193452e-05, |
|
"loss": 6.8649, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.013136720417747709, |
|
"grad_norm": 0.22745263576507568, |
|
"learning_rate": 4.831739388751584e-05, |
|
"loss": 6.8694, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.013300929422969556, |
|
"grad_norm": 0.2070324420928955, |
|
"learning_rate": 4.729877202110195e-05, |
|
"loss": 6.8627, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.013465138428191402, |
|
"grad_norm": 0.188340425491333, |
|
"learning_rate": 4.628419963750016e-05, |
|
"loss": 6.8661, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.013629347433413248, |
|
"grad_norm": 0.19918252527713776, |
|
"learning_rate": 4.527421022220663e-05, |
|
"loss": 6.8717, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.013793556438635095, |
|
"grad_norm": 0.20109033584594727, |
|
"learning_rate": 4.426933485088729e-05, |
|
"loss": 6.8634, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.013957765443856941, |
|
"grad_norm": 0.21436934173107147, |
|
"learning_rate": 4.327010191012617e-05, |
|
"loss": 6.8627, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.014121974449078787, |
|
"grad_norm": 0.2412048876285553, |
|
"learning_rate": 4.227703681958749e-05, |
|
"loss": 6.8715, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.014286183454300634, |
|
"grad_norm": 0.21019670367240906, |
|
"learning_rate": 4.1290661755738e-05, |
|
"loss": 6.8719, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.01445039245952248, |
|
"grad_norm": 0.23564065992832184, |
|
"learning_rate": 4.03114953772745e-05, |
|
"loss": 6.8576, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.014614601464744326, |
|
"grad_norm": 0.23380470275878906, |
|
"learning_rate": 3.934005255240122e-05, |
|
"loss": 6.8628, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.014778810469966173, |
|
"grad_norm": 0.2209361046552658, |
|
"learning_rate": 3.837684408810023e-05, |
|
"loss": 6.8548, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.01494301947518802, |
|
"grad_norm": 0.23763206601142883, |
|
"learning_rate": 3.7422376461537435e-05, |
|
"loss": 6.8656, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.015107228480409865, |
|
"grad_norm": 0.2792171537876129, |
|
"learning_rate": 3.647715155374519e-05, |
|
"loss": 6.8605, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.015271437485631712, |
|
"grad_norm": 0.2585715353488922, |
|
"learning_rate": 3.554166638572175e-05, |
|
"loss": 6.8548, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.015435646490853558, |
|
"grad_norm": 0.2927703559398651, |
|
"learning_rate": 3.461641285708611e-05, |
|
"loss": 6.8593, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.015599855496075404, |
|
"grad_norm": 0.31993693113327026, |
|
"learning_rate": 3.370187748742601e-05, |
|
"loss": 6.8542, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.01576406450129725, |
|
"grad_norm": 0.3359384834766388, |
|
"learning_rate": 3.279854116047457e-05, |
|
"loss": 6.8526, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.015928273506519097, |
|
"grad_norm": 0.3367615044116974, |
|
"learning_rate": 3.190687887125077e-05, |
|
"loss": 6.8617, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.016092482511740943, |
|
"grad_norm": 0.4395657181739807, |
|
"learning_rate": 3.102735947629594e-05, |
|
"loss": 6.8552, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.01625669151696279, |
|
"grad_norm": 0.5388174653053284, |
|
"learning_rate": 3.0160445447138308e-05, |
|
"loss": 6.8528, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.016420900522184638, |
|
"grad_norm": 0.9235198497772217, |
|
"learning_rate": 2.9306592627114883e-05, |
|
"loss": 6.8479, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.016420900522184638, |
|
"eval_loss": 6.867819786071777, |
|
"eval_runtime": 0.1615, |
|
"eval_samples_per_second": 309.663, |
|
"eval_steps_per_second": 55.739, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.016585109527406484, |
|
"grad_norm": 0.15563268959522247, |
|
"learning_rate": 2.846624999167843e-05, |
|
"loss": 6.8711, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.01674931853262833, |
|
"grad_norm": 0.16060693562030792, |
|
"learning_rate": 2.7639859412315917e-05, |
|
"loss": 6.8678, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.016913527537850175, |
|
"grad_norm": 0.16681014001369476, |
|
"learning_rate": 2.682785542420229e-05, |
|
"loss": 6.869, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.01707773654307202, |
|
"grad_norm": 0.14798974990844727, |
|
"learning_rate": 2.603066499771192e-05, |
|
"loss": 6.8684, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.017241945548293867, |
|
"grad_norm": 0.15809090435504913, |
|
"learning_rate": 2.5248707313907747e-05, |
|
"loss": 6.8688, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.017406154553515716, |
|
"grad_norm": 0.15511666238307953, |
|
"learning_rate": 2.4482393544126215e-05, |
|
"loss": 6.8664, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.01757036355873756, |
|
"grad_norm": 0.1479611098766327, |
|
"learning_rate": 2.3732126633773928e-05, |
|
"loss": 6.8648, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.017734572563959407, |
|
"grad_norm": 0.14057576656341553, |
|
"learning_rate": 2.2998301090449738e-05, |
|
"loss": 6.8655, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.017898781569181253, |
|
"grad_norm": 0.1501767635345459, |
|
"learning_rate": 2.2281302776503497e-05, |
|
"loss": 6.8658, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.0180629905744031, |
|
"grad_norm": 0.148577019572258, |
|
"learning_rate": 2.1581508706140802e-05, |
|
"loss": 6.8669, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.018227199579624948, |
|
"grad_norm": 0.1595151275396347, |
|
"learning_rate": 2.0899286847180243e-05, |
|
"loss": 6.8665, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.018391408584846794, |
|
"grad_norm": 0.14874982833862305, |
|
"learning_rate": 2.0234995927567523e-05, |
|
"loss": 6.8682, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.01855561759006864, |
|
"grad_norm": 0.16214674711227417, |
|
"learning_rate": 1.9588985246747925e-05, |
|
"loss": 6.8665, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.018719826595290485, |
|
"grad_norm": 0.17474649846553802, |
|
"learning_rate": 1.896159449199672e-05, |
|
"loss": 6.8564, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.01888403560051233, |
|
"grad_norm": 0.17561936378479004, |
|
"learning_rate": 1.835315355980376e-05, |
|
"loss": 6.8664, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.019048244605734177, |
|
"grad_norm": 0.155584454536438, |
|
"learning_rate": 1.7763982382406352e-05, |
|
"loss": 6.8678, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.019212453610956026, |
|
"grad_norm": 0.161391481757164, |
|
"learning_rate": 1.7194390759561453e-05, |
|
"loss": 6.8642, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.019376662616177872, |
|
"grad_norm": 0.17117644846439362, |
|
"learning_rate": 1.664467819564588e-05, |
|
"loss": 6.8658, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.019540871621399718, |
|
"grad_norm": 0.15190771222114563, |
|
"learning_rate": 1.6115133742170053e-05, |
|
"loss": 6.8577, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.019705080626621563, |
|
"grad_norm": 0.15246935188770294, |
|
"learning_rate": 1.5606035845787987e-05, |
|
"loss": 6.8638, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.01986928963184341, |
|
"grad_norm": 0.1524462103843689, |
|
"learning_rate": 1.511765220188367e-05, |
|
"loss": 6.8575, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.020033498637065255, |
|
"grad_norm": 0.187980517745018, |
|
"learning_rate": 1.4650239613810693e-05, |
|
"loss": 6.8698, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.020197707642287104, |
|
"grad_norm": 0.17757758498191833, |
|
"learning_rate": 1.4204043857859129e-05, |
|
"loss": 6.8604, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.02036191664750895, |
|
"grad_norm": 0.15591758489608765, |
|
"learning_rate": 1.3779299554020672e-05, |
|
"loss": 6.8676, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.020526125652730796, |
|
"grad_norm": 0.16383862495422363, |
|
"learning_rate": 1.3376230042620109e-05, |
|
"loss": 6.8664, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.020526125652730796, |
|
"eval_loss": 6.865334510803223, |
|
"eval_runtime": 0.1697, |
|
"eval_samples_per_second": 294.637, |
|
"eval_steps_per_second": 53.035, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.02069033465795264, |
|
"grad_norm": 0.1573963165283203, |
|
"learning_rate": 1.2995047266877775e-05, |
|
"loss": 6.8635, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.020854543663174487, |
|
"grad_norm": 0.17853564023971558, |
|
"learning_rate": 1.2635951661464995e-05, |
|
"loss": 6.8617, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.021018752668396336, |
|
"grad_norm": 0.1534070074558258, |
|
"learning_rate": 1.2299132047110876e-05, |
|
"loss": 6.8606, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.021182961673618182, |
|
"grad_norm": 0.16048799455165863, |
|
"learning_rate": 1.1984765531316038e-05, |
|
"loss": 6.8625, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.021347170678840028, |
|
"grad_norm": 0.19437247514724731, |
|
"learning_rate": 1.1693017415225432e-05, |
|
"loss": 6.8558, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.021511379684061874, |
|
"grad_norm": 0.1909008026123047, |
|
"learning_rate": 1.1424041106709194e-05, |
|
"loss": 6.862, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.02167558868928372, |
|
"grad_norm": 0.18675506114959717, |
|
"learning_rate": 1.1177978039697217e-05, |
|
"loss": 6.8617, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.021839797694505565, |
|
"grad_norm": 0.19926699995994568, |
|
"learning_rate": 1.0954957599810003e-05, |
|
"loss": 6.8532, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.022004006699727414, |
|
"grad_norm": 0.18085141479969025, |
|
"learning_rate": 1.0755097056324672e-05, |
|
"loss": 6.8539, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.02216821570494926, |
|
"grad_norm": 0.19364075362682343, |
|
"learning_rate": 1.0578501500512109e-05, |
|
"loss": 6.8556, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.022332424710171106, |
|
"grad_norm": 0.21459238231182098, |
|
"learning_rate": 1.042526379037754e-05, |
|
"loss": 6.8641, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.02249663371539295, |
|
"grad_norm": 0.1923208385705948, |
|
"learning_rate": 1.0295464501833682e-05, |
|
"loss": 6.8561, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.022660842720614797, |
|
"grad_norm": 0.18413542211055756, |
|
"learning_rate": 1.0189171886332038e-05, |
|
"loss": 6.8555, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.022825051725836643, |
|
"grad_norm": 0.21922001242637634, |
|
"learning_rate": 1.0106441834974748e-05, |
|
"loss": 6.8461, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.022989260731058492, |
|
"grad_norm": 0.24008332192897797, |
|
"learning_rate": 1.0047317849125743e-05, |
|
"loss": 6.863, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.023153469736280338, |
|
"grad_norm": 0.23309044539928436, |
|
"learning_rate": 1.0011831017536722e-05, |
|
"loss": 6.8683, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.023317678741502184, |
|
"grad_norm": 0.2427399456501007, |
|
"learning_rate": 1e-05, |
|
"loss": 6.8653, |
|
"step": 142 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 142, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3803377213440.0, |
|
"train_batch_size": 6, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|