|
{ |
|
"best_metric": 1.2475459575653076, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-100", |
|
"epoch": 0.2748800495279369, |
|
"eval_steps": 25, |
|
"global_step": 111, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.002476396842594026, |
|
"grad_norm": 0.7329409122467041, |
|
"learning_rate": 9.999999999999999e-05, |
|
"loss": 2.4297, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.002476396842594026, |
|
"eval_loss": 2.370262622833252, |
|
"eval_runtime": 1.0104, |
|
"eval_samples_per_second": 49.483, |
|
"eval_steps_per_second": 12.866, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004952793685188052, |
|
"grad_norm": 0.7226835489273071, |
|
"learning_rate": 0.00019999999999999998, |
|
"loss": 2.0074, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.007429190527782077, |
|
"grad_norm": 0.7592707276344299, |
|
"learning_rate": 0.0003, |
|
"loss": 2.0726, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.009905587370376103, |
|
"grad_norm": 0.8533850312232971, |
|
"learning_rate": 0.0002999428882610971, |
|
"loss": 1.9296, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.012381984212970128, |
|
"grad_norm": 0.7311192750930786, |
|
"learning_rate": 0.0002997716013666212, |
|
"loss": 1.7437, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.014858381055564154, |
|
"grad_norm": 0.6385353803634644, |
|
"learning_rate": 0.0002994862842423856, |
|
"loss": 1.7528, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.01733477789815818, |
|
"grad_norm": 0.8438978791236877, |
|
"learning_rate": 0.0002990871782951623, |
|
"loss": 1.6075, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.019811174740752207, |
|
"grad_norm": 0.9431717395782471, |
|
"learning_rate": 0.00029857462120842744, |
|
"loss": 1.6422, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.02228757158334623, |
|
"grad_norm": 0.758976399898529, |
|
"learning_rate": 0.0002979490466566481, |
|
"loss": 1.6292, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.024763968425940256, |
|
"grad_norm": 0.6007578372955322, |
|
"learning_rate": 0.0002972109839383494, |
|
"loss": 1.5238, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.027240365268534284, |
|
"grad_norm": 0.4644957482814789, |
|
"learning_rate": 0.0002963610575282762, |
|
"loss": 1.4831, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.02971676211112831, |
|
"grad_norm": 0.5231449604034424, |
|
"learning_rate": 0.0002953999865490242, |
|
"loss": 1.4047, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03219315895372234, |
|
"grad_norm": 0.6303499937057495, |
|
"learning_rate": 0.00029432858416259097, |
|
"loss": 1.8963, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03466955579631636, |
|
"grad_norm": 0.7002948522567749, |
|
"learning_rate": 0.0002931477568823596, |
|
"loss": 1.5882, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.037145952638910386, |
|
"grad_norm": 0.48810043931007385, |
|
"learning_rate": 0.00029185850380609757, |
|
"loss": 1.5572, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.039622349481504414, |
|
"grad_norm": 0.4367665648460388, |
|
"learning_rate": 0.000290461915770621, |
|
"loss": 1.4534, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.042098746324098435, |
|
"grad_norm": 0.39929234981536865, |
|
"learning_rate": 0.00028895917442883697, |
|
"loss": 1.4147, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.04457514316669246, |
|
"grad_norm": 0.38216057419776917, |
|
"learning_rate": 0.00028735155124994774, |
|
"loss": 1.438, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04705154000928649, |
|
"grad_norm": 0.3422548770904541, |
|
"learning_rate": 0.0002856404064436606, |
|
"loss": 1.3886, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.04952793685188051, |
|
"grad_norm": 0.4055199921131134, |
|
"learning_rate": 0.000283827187809315, |
|
"loss": 1.47, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05200433369447454, |
|
"grad_norm": 0.42674520611763, |
|
"learning_rate": 0.0002819134295108992, |
|
"loss": 1.4333, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.05448073053706857, |
|
"grad_norm": 0.3820836544036865, |
|
"learning_rate": 0.00027990075077899494, |
|
"loss": 1.4092, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.05695712737966259, |
|
"grad_norm": 0.4357558786869049, |
|
"learning_rate": 0.0002777908545407464, |
|
"loss": 1.4545, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.05943352422225662, |
|
"grad_norm": 0.43220487236976624, |
|
"learning_rate": 0.0002755855259790139, |
|
"loss": 1.3423, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.061909921064850645, |
|
"grad_norm": 0.5457894206047058, |
|
"learning_rate": 0.0002732866310219309, |
|
"loss": 1.4758, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.061909921064850645, |
|
"eval_loss": 1.4271225929260254, |
|
"eval_runtime": 1.0088, |
|
"eval_samples_per_second": 49.566, |
|
"eval_steps_per_second": 12.887, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06438631790744467, |
|
"grad_norm": 0.6328326463699341, |
|
"learning_rate": 0.0002708961147641427, |
|
"loss": 1.6565, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.06686271475003869, |
|
"grad_norm": 0.39392906427383423, |
|
"learning_rate": 0.000268415999821062, |
|
"loss": 1.445, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.06933911159263272, |
|
"grad_norm": 0.40518683195114136, |
|
"learning_rate": 0.00026584838461753444, |
|
"loss": 1.4417, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.07181550843522674, |
|
"grad_norm": 0.40206319093704224, |
|
"learning_rate": 0.00026319544161236156, |
|
"loss": 1.3806, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.07429190527782077, |
|
"grad_norm": 0.427487313747406, |
|
"learning_rate": 0.0002604594154601839, |
|
"loss": 1.3721, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0767683021204148, |
|
"grad_norm": 0.4002684950828552, |
|
"learning_rate": 0.00025764262111227905, |
|
"loss": 1.3487, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.07924469896300883, |
|
"grad_norm": 0.474567711353302, |
|
"learning_rate": 0.00025474744185788155, |
|
"loss": 1.3678, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.08172109580560284, |
|
"grad_norm": 0.40476325154304504, |
|
"learning_rate": 0.0002517763273076828, |
|
"loss": 1.3158, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.08419749264819687, |
|
"grad_norm": 0.3845173418521881, |
|
"learning_rate": 0.00024873179132121507, |
|
"loss": 1.3835, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0866738894907909, |
|
"grad_norm": 0.3646795451641083, |
|
"learning_rate": 0.0002456164098798761, |
|
"loss": 1.3434, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08915028633338493, |
|
"grad_norm": 0.449341356754303, |
|
"learning_rate": 0.0002424328189073912, |
|
"loss": 1.297, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.09162668317597895, |
|
"grad_norm": 0.44050559401512146, |
|
"learning_rate": 0.0002391837120395588, |
|
"loss": 1.2494, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.09410308001857298, |
|
"grad_norm": 0.5633939504623413, |
|
"learning_rate": 0.00023587183834516558, |
|
"loss": 1.6648, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.096579476861167, |
|
"grad_norm": 0.47090381383895874, |
|
"learning_rate": 0.00023249999999999999, |
|
"loss": 1.4137, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.09905587370376102, |
|
"grad_norm": 0.3539658486843109, |
|
"learning_rate": 0.00022907104991593143, |
|
"loss": 1.3808, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.10153227054635505, |
|
"grad_norm": 0.33691152930259705, |
|
"learning_rate": 0.0002255878893270624, |
|
"loss": 1.2999, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.10400866738894908, |
|
"grad_norm": 0.3699648976325989, |
|
"learning_rate": 0.00022205346533499438, |
|
"loss": 1.3159, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.10648506423154311, |
|
"grad_norm": 0.3926730155944824, |
|
"learning_rate": 0.00021847076841528617, |
|
"loss": 1.2978, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.10896146107413714, |
|
"grad_norm": 0.43968045711517334, |
|
"learning_rate": 0.00021484282988721236, |
|
"loss": 1.3076, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.11143785791673115, |
|
"grad_norm": 0.4484625458717346, |
|
"learning_rate": 0.00021117271934896527, |
|
"loss": 1.3052, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.11391425475932518, |
|
"grad_norm": 0.4811747670173645, |
|
"learning_rate": 0.00020746354208046782, |
|
"loss": 1.2317, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.1163906516019192, |
|
"grad_norm": 0.38913848996162415, |
|
"learning_rate": 0.00020371843641599718, |
|
"loss": 1.2534, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.11886704844451323, |
|
"grad_norm": 0.4041205048561096, |
|
"learning_rate": 0.0001999405710888403, |
|
"loss": 1.3195, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.12134344528710726, |
|
"grad_norm": 0.391659677028656, |
|
"learning_rate": 0.0001961331425502294, |
|
"loss": 1.2099, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.12381984212970129, |
|
"grad_norm": 0.4483822286128998, |
|
"learning_rate": 0.0001922993722648251, |
|
"loss": 1.2427, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12381984212970129, |
|
"eval_loss": 1.3279614448547363, |
|
"eval_runtime": 1.0087, |
|
"eval_samples_per_second": 49.57, |
|
"eval_steps_per_second": 12.888, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1262962389722953, |
|
"grad_norm": 0.5131921768188477, |
|
"learning_rate": 0.0001884425039850356, |
|
"loss": 1.602, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.12877263581488935, |
|
"grad_norm": 0.588230550289154, |
|
"learning_rate": 0.00018456580100647827, |
|
"loss": 1.3518, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.13124903265748336, |
|
"grad_norm": 0.5344261527061462, |
|
"learning_rate": 0.00018067254340690606, |
|
"loss": 1.2842, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.13372542950007738, |
|
"grad_norm": 0.42600345611572266, |
|
"learning_rate": 0.00017676602527093386, |
|
"loss": 1.2732, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.13620182634267142, |
|
"grad_norm": 0.3716365396976471, |
|
"learning_rate": 0.00017284955190291422, |
|
"loss": 1.3104, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.13867822318526543, |
|
"grad_norm": 0.36169472336769104, |
|
"learning_rate": 0.00016892643703032004, |
|
"loss": 1.2831, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.14115462002785947, |
|
"grad_norm": 0.38514360785484314, |
|
"learning_rate": 0.000165, |
|
"loss": 1.1825, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.1436310168704535, |
|
"grad_norm": 0.46233615279197693, |
|
"learning_rate": 0.00016107356296967993, |
|
"loss": 1.3065, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.14610741371304753, |
|
"grad_norm": 0.49720239639282227, |
|
"learning_rate": 0.00015715044809708577, |
|
"loss": 1.2713, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.14858381055564154, |
|
"grad_norm": 0.4614707827568054, |
|
"learning_rate": 0.0001532339747290661, |
|
"loss": 1.261, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15106020739823556, |
|
"grad_norm": 0.4578586220741272, |
|
"learning_rate": 0.00014932745659309386, |
|
"loss": 1.2485, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.1535366042408296, |
|
"grad_norm": 0.38602447509765625, |
|
"learning_rate": 0.00014543419899352172, |
|
"loss": 1.1659, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.1560130010834236, |
|
"grad_norm": 0.4391464293003082, |
|
"learning_rate": 0.0001415574960149644, |
|
"loss": 1.5451, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.15848939792601766, |
|
"grad_norm": 0.39024028182029724, |
|
"learning_rate": 0.0001377006277351749, |
|
"loss": 1.3855, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.16096579476861167, |
|
"grad_norm": 0.33916693925857544, |
|
"learning_rate": 0.00013386685744977056, |
|
"loss": 1.2976, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.16344219161120568, |
|
"grad_norm": 0.3237704634666443, |
|
"learning_rate": 0.00013005942891115968, |
|
"loss": 1.2733, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.16591858845379973, |
|
"grad_norm": 0.3709225654602051, |
|
"learning_rate": 0.00012628156358400285, |
|
"loss": 1.2839, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.16839498529639374, |
|
"grad_norm": 0.3892875611782074, |
|
"learning_rate": 0.00012253645791953217, |
|
"loss": 1.3312, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.17087138213898778, |
|
"grad_norm": 0.3935694992542267, |
|
"learning_rate": 0.00011882728065103471, |
|
"loss": 1.246, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.1733477789815818, |
|
"grad_norm": 0.3991238474845886, |
|
"learning_rate": 0.0001151571701127876, |
|
"loss": 1.2713, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.17582417582417584, |
|
"grad_norm": 0.39288103580474854, |
|
"learning_rate": 0.00011152923158471383, |
|
"loss": 1.228, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.17830057266676985, |
|
"grad_norm": 0.3925284743309021, |
|
"learning_rate": 0.0001079465346650056, |
|
"loss": 1.2817, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.18077696950936387, |
|
"grad_norm": 0.41174066066741943, |
|
"learning_rate": 0.00010441211067293761, |
|
"loss": 1.2499, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.1832533663519579, |
|
"grad_norm": 0.3850061595439911, |
|
"learning_rate": 0.00010092895008406854, |
|
"loss": 1.1749, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.18572976319455192, |
|
"grad_norm": 0.45076093077659607, |
|
"learning_rate": 9.750000000000003e-05, |
|
"loss": 1.2532, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.18572976319455192, |
|
"eval_loss": 1.2752010822296143, |
|
"eval_runtime": 1.015, |
|
"eval_samples_per_second": 49.259, |
|
"eval_steps_per_second": 12.807, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.18820616003714596, |
|
"grad_norm": 0.44986942410469055, |
|
"learning_rate": 9.412816165483439e-05, |
|
"loss": 1.6073, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.19068255687973998, |
|
"grad_norm": 0.35847336053848267, |
|
"learning_rate": 9.081628796044118e-05, |
|
"loss": 1.2849, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.193158953722334, |
|
"grad_norm": 0.3534252643585205, |
|
"learning_rate": 8.756718109260881e-05, |
|
"loss": 1.3032, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.19563535056492803, |
|
"grad_norm": 0.3720012903213501, |
|
"learning_rate": 8.438359012012389e-05, |
|
"loss": 1.2459, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.19811174740752205, |
|
"grad_norm": 0.3475172817707062, |
|
"learning_rate": 8.126820867878491e-05, |
|
"loss": 1.2174, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2005881442501161, |
|
"grad_norm": 0.3700581192970276, |
|
"learning_rate": 7.822367269231717e-05, |
|
"loss": 1.3342, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.2030645410927101, |
|
"grad_norm": 0.38341790437698364, |
|
"learning_rate": 7.525255814211838e-05, |
|
"loss": 1.2484, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.20554093793530412, |
|
"grad_norm": 0.3764133155345917, |
|
"learning_rate": 7.235737888772097e-05, |
|
"loss": 1.2419, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.20801733477789816, |
|
"grad_norm": 0.387975811958313, |
|
"learning_rate": 6.954058453981609e-05, |
|
"loss": 1.2377, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.21049373162049217, |
|
"grad_norm": 0.40386706590652466, |
|
"learning_rate": 6.680455838763842e-05, |
|
"loss": 1.206, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.21297012846308622, |
|
"grad_norm": 0.4010809063911438, |
|
"learning_rate": 6.415161538246557e-05, |
|
"loss": 1.2053, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.21544652530568023, |
|
"grad_norm": 0.420891135931015, |
|
"learning_rate": 6.158400017893799e-05, |
|
"loss": 1.2282, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.21792292214827427, |
|
"grad_norm": 0.44719812273979187, |
|
"learning_rate": 5.9103885235857274e-05, |
|
"loss": 1.5592, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.2203993189908683, |
|
"grad_norm": 0.36982178688049316, |
|
"learning_rate": 5.671336897806908e-05, |
|
"loss": 1.348, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.2228757158334623, |
|
"grad_norm": 0.3596123158931732, |
|
"learning_rate": 5.441447402098609e-05, |
|
"loss": 1.2514, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.22535211267605634, |
|
"grad_norm": 0.3666294515132904, |
|
"learning_rate": 5.2209145459253604e-05, |
|
"loss": 1.2815, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.22782850951865036, |
|
"grad_norm": 0.3626820743083954, |
|
"learning_rate": 5.0099249221005035e-05, |
|
"loss": 1.242, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.2303049063612444, |
|
"grad_norm": 0.3683141767978668, |
|
"learning_rate": 4.808657048910077e-05, |
|
"loss": 1.2619, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.2327813032038384, |
|
"grad_norm": 0.38167521357536316, |
|
"learning_rate": 4.617281219068502e-05, |
|
"loss": 1.2324, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.23525770004643243, |
|
"grad_norm": 0.386264443397522, |
|
"learning_rate": 4.435959355633935e-05, |
|
"loss": 1.2487, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.23773409688902647, |
|
"grad_norm": 0.3971124589443207, |
|
"learning_rate": 4.264844875005226e-05, |
|
"loss": 1.2667, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.24021049373162048, |
|
"grad_norm": 0.39618241786956787, |
|
"learning_rate": 4.104082557116302e-05, |
|
"loss": 1.2715, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.24268689057421453, |
|
"grad_norm": 0.4155503809452057, |
|
"learning_rate": 3.953808422937896e-05, |
|
"loss": 1.199, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.24516328741680854, |
|
"grad_norm": 0.40676349401474, |
|
"learning_rate": 3.814149619390237e-05, |
|
"loss": 1.1614, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.24763968425940258, |
|
"grad_norm": 0.4450746178627014, |
|
"learning_rate": 3.685224311764042e-05, |
|
"loss": 1.185, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.24763968425940258, |
|
"eval_loss": 1.2475459575653076, |
|
"eval_runtime": 1.0194, |
|
"eval_samples_per_second": 49.047, |
|
"eval_steps_per_second": 12.752, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.25011608110199657, |
|
"grad_norm": 0.4321860671043396, |
|
"learning_rate": 3.567141583740899e-05, |
|
"loss": 1.5005, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.2525924779445906, |
|
"grad_norm": 0.3553882837295532, |
|
"learning_rate": 3.4600013450975794e-05, |
|
"loss": 1.2593, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.25506887478718465, |
|
"grad_norm": 0.3538253605365753, |
|
"learning_rate": 3.3638942471723784e-05, |
|
"loss": 1.2722, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.2575452716297787, |
|
"grad_norm": 0.36580267548561096, |
|
"learning_rate": 3.2789016061650545e-05, |
|
"loss": 1.2305, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.2600216684723727, |
|
"grad_norm": 0.36455708742141724, |
|
"learning_rate": 3.205095334335192e-05, |
|
"loss": 1.2613, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.2624980653149667, |
|
"grad_norm": 0.3923397660255432, |
|
"learning_rate": 3.1425378791572495e-05, |
|
"loss": 1.3292, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.26497446215756076, |
|
"grad_norm": 0.3707919120788574, |
|
"learning_rate": 3.0912821704837695e-05, |
|
"loss": 1.1993, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.26745085900015475, |
|
"grad_norm": 0.3740595281124115, |
|
"learning_rate": 3.051371575761435e-05, |
|
"loss": 1.1775, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.2699272558427488, |
|
"grad_norm": 0.3999345004558563, |
|
"learning_rate": 3.0228398633378795e-05, |
|
"loss": 1.198, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.27240365268534283, |
|
"grad_norm": 0.43420764803886414, |
|
"learning_rate": 3.005711173890292e-05, |
|
"loss": 1.2404, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2748800495279369, |
|
"grad_norm": 0.45214489102363586, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 1.2226, |
|
"step": 111 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 111, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 20, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.2015776107027497e+18, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|