|
{ |
|
"best_metric": 2.227423667907715, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-100", |
|
"epoch": 0.01278935925310142, |
|
"eval_steps": 50, |
|
"global_step": 100, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0001278935925310142, |
|
"grad_norm": 0.6531283259391785, |
|
"learning_rate": 1.6666666666666668e-07, |
|
"loss": 1.572, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0001278935925310142, |
|
"eval_loss": 2.3208112716674805, |
|
"eval_runtime": 141.1671, |
|
"eval_samples_per_second": 23.32, |
|
"eval_steps_per_second": 2.919, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0002557871850620284, |
|
"grad_norm": 0.7421135902404785, |
|
"learning_rate": 3.3333333333333335e-07, |
|
"loss": 1.611, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0003836807775930426, |
|
"grad_norm": 0.8427505493164062, |
|
"learning_rate": 5.000000000000001e-07, |
|
"loss": 1.7915, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0005115743701240568, |
|
"grad_norm": 0.9320747256278992, |
|
"learning_rate": 6.666666666666667e-07, |
|
"loss": 1.8287, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.000639467962655071, |
|
"grad_norm": 0.9002219438552856, |
|
"learning_rate": 8.333333333333333e-07, |
|
"loss": 1.7165, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0007673615551860852, |
|
"grad_norm": 0.9916430711746216, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 1.9549, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0008952551477170994, |
|
"grad_norm": 1.2199214696884155, |
|
"learning_rate": 1.1666666666666668e-06, |
|
"loss": 1.8518, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0010231487402481137, |
|
"grad_norm": 1.0491547584533691, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"loss": 2.0224, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0011510423327791277, |
|
"grad_norm": 0.9636436700820923, |
|
"learning_rate": 1.5e-06, |
|
"loss": 1.7608, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.001278935925310142, |
|
"grad_norm": 1.075803279876709, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 2.024, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.001406829517841156, |
|
"grad_norm": 1.1795305013656616, |
|
"learning_rate": 1.8333333333333333e-06, |
|
"loss": 1.838, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0015347231103721704, |
|
"grad_norm": 1.0322589874267578, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 2.1333, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.0016626167029031844, |
|
"grad_norm": 1.224502682685852, |
|
"learning_rate": 2.166666666666667e-06, |
|
"loss": 2.028, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0017905102954341987, |
|
"grad_norm": 1.1498585939407349, |
|
"learning_rate": 2.3333333333333336e-06, |
|
"loss": 2.0027, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.001918403887965213, |
|
"grad_norm": 1.244385838508606, |
|
"learning_rate": 2.5e-06, |
|
"loss": 1.9931, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0020462974804962273, |
|
"grad_norm": 1.036781668663025, |
|
"learning_rate": 2.666666666666667e-06, |
|
"loss": 1.7908, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.002174191073027241, |
|
"grad_norm": 1.1222697496414185, |
|
"learning_rate": 2.8333333333333335e-06, |
|
"loss": 2.1778, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0023020846655582555, |
|
"grad_norm": 1.145642876625061, |
|
"learning_rate": 3e-06, |
|
"loss": 2.0192, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0024299782580892697, |
|
"grad_norm": 1.0589532852172852, |
|
"learning_rate": 3.1666666666666667e-06, |
|
"loss": 2.1066, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.002557871850620284, |
|
"grad_norm": 1.317820429801941, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 2.0042, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0026857654431512983, |
|
"grad_norm": 1.223871111869812, |
|
"learning_rate": 3.5e-06, |
|
"loss": 2.012, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.002813659035682312, |
|
"grad_norm": 1.3080352544784546, |
|
"learning_rate": 3.6666666666666666e-06, |
|
"loss": 2.0961, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0029415526282133265, |
|
"grad_norm": 1.4565980434417725, |
|
"learning_rate": 3.833333333333334e-06, |
|
"loss": 2.0999, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0030694462207443408, |
|
"grad_norm": 1.2557575702667236, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 2.1239, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.003197339813275355, |
|
"grad_norm": 1.3740392923355103, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 2.354, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.003325233405806369, |
|
"grad_norm": 1.4395512342453003, |
|
"learning_rate": 4.333333333333334e-06, |
|
"loss": 2.6028, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.003453126998337383, |
|
"grad_norm": 1.3880198001861572, |
|
"learning_rate": 4.5e-06, |
|
"loss": 2.1235, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0035810205908683975, |
|
"grad_norm": 1.6280770301818848, |
|
"learning_rate": 4.666666666666667e-06, |
|
"loss": 2.2612, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0037089141833994118, |
|
"grad_norm": 1.5140023231506348, |
|
"learning_rate": 4.833333333333333e-06, |
|
"loss": 2.2578, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.003836807775930426, |
|
"grad_norm": 1.3038387298583984, |
|
"learning_rate": 5e-06, |
|
"loss": 2.0897, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.00396470136846144, |
|
"grad_norm": 1.3410831689834595, |
|
"learning_rate": 4.997482666353287e-06, |
|
"loss": 2.112, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.004092594960992455, |
|
"grad_norm": 1.4958971738815308, |
|
"learning_rate": 4.989935734988098e-06, |
|
"loss": 2.4475, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.004220488553523468, |
|
"grad_norm": 1.638153076171875, |
|
"learning_rate": 4.977374404419838e-06, |
|
"loss": 2.4451, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.004348382146054482, |
|
"grad_norm": 1.6533570289611816, |
|
"learning_rate": 4.959823971496575e-06, |
|
"loss": 2.5101, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.004476275738585497, |
|
"grad_norm": 1.6795432567596436, |
|
"learning_rate": 4.937319780454559e-06, |
|
"loss": 2.2065, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.004604169331116511, |
|
"grad_norm": 1.6957430839538574, |
|
"learning_rate": 4.909907151739634e-06, |
|
"loss": 2.6026, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.004732062923647525, |
|
"grad_norm": 1.6112371683120728, |
|
"learning_rate": 4.8776412907378845e-06, |
|
"loss": 2.5477, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0048599565161785395, |
|
"grad_norm": 1.9008305072784424, |
|
"learning_rate": 4.8405871765993435e-06, |
|
"loss": 2.5082, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.004987850108709554, |
|
"grad_norm": 1.69205904006958, |
|
"learning_rate": 4.7988194313786275e-06, |
|
"loss": 2.6694, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.005115743701240568, |
|
"grad_norm": 1.9658174514770508, |
|
"learning_rate": 4.752422169756048e-06, |
|
"loss": 3.1494, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.005243637293771582, |
|
"grad_norm": 1.7338308095932007, |
|
"learning_rate": 4.701488829641845e-06, |
|
"loss": 2.5999, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.005371530886302597, |
|
"grad_norm": 1.9238468408584595, |
|
"learning_rate": 4.646121984004666e-06, |
|
"loss": 2.7518, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.00549942447883361, |
|
"grad_norm": 1.9128526449203491, |
|
"learning_rate": 4.586433134303257e-06, |
|
"loss": 2.4259, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.005627318071364624, |
|
"grad_norm": 1.9675108194351196, |
|
"learning_rate": 4.522542485937369e-06, |
|
"loss": 2.5403, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.005755211663895639, |
|
"grad_norm": 2.1037259101867676, |
|
"learning_rate": 4.454578706170075e-06, |
|
"loss": 3.0036, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.005883105256426653, |
|
"grad_norm": 2.1286771297454834, |
|
"learning_rate": 4.382678665009028e-06, |
|
"loss": 2.9692, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.006010998848957667, |
|
"grad_norm": 2.4412331581115723, |
|
"learning_rate": 4.3069871595684795e-06, |
|
"loss": 3.1744, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.0061388924414886815, |
|
"grad_norm": 2.340425491333008, |
|
"learning_rate": 4.227656622467162e-06, |
|
"loss": 3.1443, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.006266786034019696, |
|
"grad_norm": 2.878222703933716, |
|
"learning_rate": 4.144846814849282e-06, |
|
"loss": 3.6609, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.00639467962655071, |
|
"grad_norm": 4.124127388000488, |
|
"learning_rate": 4.058724504646834e-06, |
|
"loss": 4.5455, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.00639467962655071, |
|
"eval_loss": 2.273921012878418, |
|
"eval_runtime": 141.3295, |
|
"eval_samples_per_second": 23.293, |
|
"eval_steps_per_second": 2.915, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.006522573219081724, |
|
"grad_norm": 0.824315071105957, |
|
"learning_rate": 3.969463130731183e-06, |
|
"loss": 1.7344, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.006650466811612738, |
|
"grad_norm": 0.8575537204742432, |
|
"learning_rate": 3.8772424536302565e-06, |
|
"loss": 1.6085, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.006778360404143752, |
|
"grad_norm": 1.030655860900879, |
|
"learning_rate": 3.782248193514766e-06, |
|
"loss": 1.6915, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.006906253996674766, |
|
"grad_norm": 0.9780040979385376, |
|
"learning_rate": 3.684671656182497e-06, |
|
"loss": 1.8391, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.007034147589205781, |
|
"grad_norm": 1.075355887413025, |
|
"learning_rate": 3.5847093477938955e-06, |
|
"loss": 1.7932, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.007162041181736795, |
|
"grad_norm": 1.0918196439743042, |
|
"learning_rate": 3.4825625791348093e-06, |
|
"loss": 1.7666, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.007289934774267809, |
|
"grad_norm": 1.0339546203613281, |
|
"learning_rate": 3.3784370602033572e-06, |
|
"loss": 1.8442, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.0074178283667988235, |
|
"grad_norm": 1.178619384765625, |
|
"learning_rate": 3.272542485937369e-06, |
|
"loss": 1.9234, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.007545721959329838, |
|
"grad_norm": 1.1075314283370972, |
|
"learning_rate": 3.165092113916688e-06, |
|
"loss": 1.9327, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.007673615551860852, |
|
"grad_norm": 1.2657276391983032, |
|
"learning_rate": 3.056302334890786e-06, |
|
"loss": 1.8388, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.007801509144391866, |
|
"grad_norm": 1.1688446998596191, |
|
"learning_rate": 2.946392236996592e-06, |
|
"loss": 1.7806, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.00792940273692288, |
|
"grad_norm": 1.1615688800811768, |
|
"learning_rate": 2.835583164544139e-06, |
|
"loss": 1.9443, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.008057296329453895, |
|
"grad_norm": 1.2162307500839233, |
|
"learning_rate": 2.724098272258584e-06, |
|
"loss": 1.7797, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.00818518992198491, |
|
"grad_norm": 1.4072235822677612, |
|
"learning_rate": 2.6121620758762877e-06, |
|
"loss": 1.7035, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.008313083514515924, |
|
"grad_norm": 1.484116792678833, |
|
"learning_rate": 2.5e-06, |
|
"loss": 2.0504, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.008440977107046936, |
|
"grad_norm": 1.3416109085083008, |
|
"learning_rate": 2.3878379241237136e-06, |
|
"loss": 1.9764, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.00856887069957795, |
|
"grad_norm": 1.3301259279251099, |
|
"learning_rate": 2.2759017277414165e-06, |
|
"loss": 1.951, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.008696764292108965, |
|
"grad_norm": 1.3805782794952393, |
|
"learning_rate": 2.1644168354558623e-06, |
|
"loss": 2.0396, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.008824657884639979, |
|
"grad_norm": 1.2760380506515503, |
|
"learning_rate": 2.053607763003409e-06, |
|
"loss": 1.9385, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.008952551477170993, |
|
"grad_norm": 1.4715478420257568, |
|
"learning_rate": 1.9436976651092143e-06, |
|
"loss": 2.2723, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.009080445069702008, |
|
"grad_norm": 1.3770544528961182, |
|
"learning_rate": 1.8349078860833125e-06, |
|
"loss": 1.888, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.009208338662233022, |
|
"grad_norm": 1.2823768854141235, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"loss": 2.0006, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.009336232254764036, |
|
"grad_norm": 1.366890788078308, |
|
"learning_rate": 1.6215629397966432e-06, |
|
"loss": 1.8851, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.00946412584729505, |
|
"grad_norm": 1.4455935955047607, |
|
"learning_rate": 1.5174374208651913e-06, |
|
"loss": 2.0491, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.009592019439826065, |
|
"grad_norm": 1.420132040977478, |
|
"learning_rate": 1.415290652206105e-06, |
|
"loss": 1.9303, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.009719913032357079, |
|
"grad_norm": 1.3549017906188965, |
|
"learning_rate": 1.3153283438175036e-06, |
|
"loss": 1.97, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.009847806624888093, |
|
"grad_norm": 1.558439016342163, |
|
"learning_rate": 1.217751806485235e-06, |
|
"loss": 2.4078, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.009975700217419108, |
|
"grad_norm": 1.561328649520874, |
|
"learning_rate": 1.122757546369744e-06, |
|
"loss": 2.0117, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.010103593809950122, |
|
"grad_norm": 1.3918482065200806, |
|
"learning_rate": 1.0305368692688175e-06, |
|
"loss": 2.0692, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.010231487402481136, |
|
"grad_norm": 1.4925870895385742, |
|
"learning_rate": 9.412754953531664e-07, |
|
"loss": 2.1758, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.01035938099501215, |
|
"grad_norm": 1.8017629384994507, |
|
"learning_rate": 8.551531851507186e-07, |
|
"loss": 2.1219, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.010487274587543165, |
|
"grad_norm": 1.533422827720642, |
|
"learning_rate": 7.723433775328385e-07, |
|
"loss": 2.0952, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.010615168180074179, |
|
"grad_norm": 1.8227595090866089, |
|
"learning_rate": 6.930128404315214e-07, |
|
"loss": 2.258, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.010743061772605193, |
|
"grad_norm": 1.8445152044296265, |
|
"learning_rate": 6.17321334990973e-07, |
|
"loss": 2.3289, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.010870955365136206, |
|
"grad_norm": 1.7444809675216675, |
|
"learning_rate": 5.454212938299256e-07, |
|
"loss": 2.2124, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.01099884895766722, |
|
"grad_norm": 1.7502052783966064, |
|
"learning_rate": 4.774575140626317e-07, |
|
"loss": 2.2203, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.011126742550198234, |
|
"grad_norm": 1.734458327293396, |
|
"learning_rate": 4.1356686569674344e-07, |
|
"loss": 2.5345, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.011254636142729249, |
|
"grad_norm": 2.3257081508636475, |
|
"learning_rate": 3.538780159953348e-07, |
|
"loss": 2.403, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.011382529735260263, |
|
"grad_norm": 1.9918384552001953, |
|
"learning_rate": 2.98511170358155e-07, |
|
"loss": 2.5831, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.011510423327791277, |
|
"grad_norm": 2.127302885055542, |
|
"learning_rate": 2.4757783024395244e-07, |
|
"loss": 2.9882, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.011638316920322292, |
|
"grad_norm": 2.2728164196014404, |
|
"learning_rate": 2.0118056862137358e-07, |
|
"loss": 2.6982, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.011766210512853306, |
|
"grad_norm": 2.0041537284851074, |
|
"learning_rate": 1.59412823400657e-07, |
|
"loss": 2.5671, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.01189410410538432, |
|
"grad_norm": 2.0824832916259766, |
|
"learning_rate": 1.223587092621162e-07, |
|
"loss": 2.5581, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.012021997697915334, |
|
"grad_norm": 2.170213460922241, |
|
"learning_rate": 9.00928482603669e-08, |
|
"loss": 2.4461, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.012149891290446349, |
|
"grad_norm": 2.363558769226074, |
|
"learning_rate": 6.268021954544095e-08, |
|
"loss": 2.8194, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.012277784882977363, |
|
"grad_norm": 2.4575698375701904, |
|
"learning_rate": 4.017602850342584e-08, |
|
"loss": 3.1582, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.012405678475508377, |
|
"grad_norm": 2.772186517715454, |
|
"learning_rate": 2.262559558016325e-08, |
|
"loss": 3.2808, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.012533572068039392, |
|
"grad_norm": 3.1454925537109375, |
|
"learning_rate": 1.006426501190233e-08, |
|
"loss": 3.2904, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.012661465660570406, |
|
"grad_norm": 2.797976493835449, |
|
"learning_rate": 2.5173336467135266e-09, |
|
"loss": 3.0262, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.01278935925310142, |
|
"grad_norm": 3.6094884872436523, |
|
"learning_rate": 0.0, |
|
"loss": 4.3375, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.01278935925310142, |
|
"eval_loss": 2.227423667907715, |
|
"eval_runtime": 141.3088, |
|
"eval_samples_per_second": 23.296, |
|
"eval_steps_per_second": 2.916, |
|
"step": 100 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 100, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5971447382016000.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|