mistral-7b-dpo-all_pairs / trainer_state.json
yangzhao02's picture
Model save
2205144 verified
raw
history blame
24.5 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9981298423724285,
"eval_steps": 200,
"global_step": 467,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0021373230029388193,
"grad_norm": 0.0,
"learning_rate": 0.0,
"logits": -2.7276527881622314,
"logps": -123.19757843017578,
"loss": 0.6931,
"step": 1
},
{
"epoch": 0.010686615014694095,
"grad_norm": 27.89530248586404,
"learning_rate": 1.0638297872340425e-08,
"logits": -2.8715224266052246,
"logps": -234.59034729003906,
"loss": 0.6931,
"step": 5
},
{
"epoch": 0.02137323002938819,
"grad_norm": 24.041320691027245,
"learning_rate": 6.382978723404254e-08,
"logits": -2.8461320400238037,
"logps": -248.1672821044922,
"loss": 0.6928,
"step": 10
},
{
"epoch": 0.03205984504408229,
"grad_norm": 24.00395488621066,
"learning_rate": 1.0638297872340425e-07,
"logits": -2.780062437057495,
"logps": -229.3790740966797,
"loss": 0.683,
"step": 15
},
{
"epoch": 0.04274646005877638,
"grad_norm": 19.287203304415307,
"learning_rate": 1.4893617021276595e-07,
"logits": -2.772031545639038,
"logps": -204.7851104736328,
"loss": 0.6589,
"step": 20
},
{
"epoch": 0.053433075073470476,
"grad_norm": 16.506203912380084,
"learning_rate": 2.0212765957446807e-07,
"logits": -2.9439215660095215,
"logps": -291.8533630371094,
"loss": 0.5956,
"step": 25
},
{
"epoch": 0.06411969008816458,
"grad_norm": 13.779319784342116,
"learning_rate": 2.4468085106382976e-07,
"logits": -2.911271572113037,
"logps": -281.06744384765625,
"loss": 0.5498,
"step": 30
},
{
"epoch": 0.07480630510285867,
"grad_norm": 12.4154908271174,
"learning_rate": 2.978723404255319e-07,
"logits": -2.8849587440490723,
"logps": -235.69473266601562,
"loss": 0.5207,
"step": 35
},
{
"epoch": 0.08549292011755276,
"grad_norm": 13.748382495294905,
"learning_rate": 3.5106382978723405e-07,
"logits": -2.877370595932007,
"logps": -235.3643035888672,
"loss": 0.4912,
"step": 40
},
{
"epoch": 0.09617953513224686,
"grad_norm": 13.504025107902667,
"learning_rate": 4.0425531914893614e-07,
"logits": -2.7878780364990234,
"logps": -260.6932678222656,
"loss": 0.4916,
"step": 45
},
{
"epoch": 0.10686615014694095,
"grad_norm": 14.324493553144778,
"learning_rate": 4.574468085106383e-07,
"logits": -2.603867292404175,
"logps": -273.16552734375,
"loss": 0.4721,
"step": 50
},
{
"epoch": 0.11755276516163506,
"grad_norm": 15.40742501299336,
"learning_rate": 4.999930062653174e-07,
"logits": -2.7843973636627197,
"logps": -288.14190673828125,
"loss": 0.4515,
"step": 55
},
{
"epoch": 0.12823938017632916,
"grad_norm": 14.561448001055401,
"learning_rate": 4.997482666353286e-07,
"logits": -2.647210121154785,
"logps": -257.77447509765625,
"loss": 0.4475,
"step": 60
},
{
"epoch": 0.13892599519102325,
"grad_norm": 15.068250536743896,
"learning_rate": 4.991542314714122e-07,
"logits": -2.5821423530578613,
"logps": -303.4721984863281,
"loss": 0.4282,
"step": 65
},
{
"epoch": 0.14961261020571734,
"grad_norm": 16.720167595176566,
"learning_rate": 4.982117315854593e-07,
"logits": -2.341573476791382,
"logps": -270.63189697265625,
"loss": 0.4396,
"step": 70
},
{
"epoch": 0.16029922522041143,
"grad_norm": 13.024576148545005,
"learning_rate": 4.969220851487844e-07,
"logits": -2.181319236755371,
"logps": -271.0784912109375,
"loss": 0.4102,
"step": 75
},
{
"epoch": 0.17098584023510552,
"grad_norm": 14.533980847784468,
"learning_rate": 4.952870958485431e-07,
"logits": -2.5626049041748047,
"logps": -315.2617492675781,
"loss": 0.4169,
"step": 80
},
{
"epoch": 0.18167245524979964,
"grad_norm": 20.372388284984453,
"learning_rate": 4.933090503651128e-07,
"logits": -2.1712753772735596,
"logps": -308.96380615234375,
"loss": 0.4085,
"step": 85
},
{
"epoch": 0.19235907026449373,
"grad_norm": 16.99275205662673,
"learning_rate": 4.909907151739633e-07,
"logits": -2.3788561820983887,
"logps": -242.3260498046875,
"loss": 0.4102,
"step": 90
},
{
"epoch": 0.20304568527918782,
"grad_norm": 15.680297362409974,
"learning_rate": 4.883353326764906e-07,
"logits": -2.041024684906006,
"logps": -308.76361083984375,
"loss": 0.4059,
"step": 95
},
{
"epoch": 0.2137323002938819,
"grad_norm": 16.614616612784232,
"learning_rate": 4.853466166652258e-07,
"logits": -2.2316627502441406,
"logps": -250.2170867919922,
"loss": 0.3966,
"step": 100
},
{
"epoch": 0.224418915308576,
"grad_norm": 15.24947145329996,
"learning_rate": 4.820287471297597e-07,
"logits": -2.2991251945495605,
"logps": -299.30413818359375,
"loss": 0.3826,
"step": 105
},
{
"epoch": 0.2351055303232701,
"grad_norm": 13.727683778137456,
"learning_rate": 4.783863644106502e-07,
"logits": -2.3693361282348633,
"logps": -316.0078125,
"loss": 0.3928,
"step": 110
},
{
"epoch": 0.2457921453379642,
"grad_norm": 14.648764803231707,
"learning_rate": 4.744245627094858e-07,
"logits": -2.2451415061950684,
"logps": -289.3954772949219,
"loss": 0.4144,
"step": 115
},
{
"epoch": 0.2564787603526583,
"grad_norm": 14.654511458237025,
"learning_rate": 4.7014888296418447e-07,
"logits": -2.1494853496551514,
"logps": -273.82159423828125,
"loss": 0.4048,
"step": 120
},
{
"epoch": 0.2671653753673524,
"grad_norm": 14.029747414365776,
"learning_rate": 4.655653050994906e-07,
"logits": -2.2021608352661133,
"logps": -295.5478515625,
"loss": 0.3795,
"step": 125
},
{
"epoch": 0.2778519903820465,
"grad_norm": 14.629974380430687,
"learning_rate": 4.606802396635098e-07,
"logits": -2.278817653656006,
"logps": -288.4320983886719,
"loss": 0.4076,
"step": 130
},
{
"epoch": 0.2885386053967406,
"grad_norm": 15.667008536793405,
"learning_rate": 4.555005188619775e-07,
"logits": -2.370594024658203,
"logps": -278.10565185546875,
"loss": 0.3922,
"step": 135
},
{
"epoch": 0.2992252204114347,
"grad_norm": 16.537626095297334,
"learning_rate": 4.500333870028016e-07,
"logits": -2.296696424484253,
"logps": -314.9455871582031,
"loss": 0.3805,
"step": 140
},
{
"epoch": 0.30991183542612877,
"grad_norm": 14.136931000002011,
"learning_rate": 4.442864903642427e-07,
"logits": -1.9875481128692627,
"logps": -321.88336181640625,
"loss": 0.3807,
"step": 145
},
{
"epoch": 0.32059845044082286,
"grad_norm": 17.66764816927005,
"learning_rate": 4.3826786650090273e-07,
"logits": -2.300191879272461,
"logps": -288.90155029296875,
"loss": 0.3962,
"step": 150
},
{
"epoch": 0.33128506545551695,
"grad_norm": 13.966542226658245,
"learning_rate": 4.319859330024777e-07,
"logits": -2.366628408432007,
"logps": -320.36199951171875,
"loss": 0.3882,
"step": 155
},
{
"epoch": 0.34197168047021104,
"grad_norm": 15.005833930316435,
"learning_rate": 4.254494757209979e-07,
"logits": -2.2027194499969482,
"logps": -344.4361267089844,
"loss": 0.3849,
"step": 160
},
{
"epoch": 0.3526582954849052,
"grad_norm": 18.876637586071404,
"learning_rate": 4.186676364830186e-07,
"logits": -2.357083559036255,
"logps": -315.52972412109375,
"loss": 0.3865,
"step": 165
},
{
"epoch": 0.36334491049959927,
"grad_norm": 15.883791751426108,
"learning_rate": 4.1164990030394985e-07,
"logits": -2.214961051940918,
"logps": -286.7484130859375,
"loss": 0.383,
"step": 170
},
{
"epoch": 0.37403152551429336,
"grad_norm": 15.570319458411793,
"learning_rate": 4.0440608212240445e-07,
"logits": -2.1734325885772705,
"logps": -290.1646728515625,
"loss": 0.3771,
"step": 175
},
{
"epoch": 0.38471814052898745,
"grad_norm": 16.13327248480777,
"learning_rate": 3.9694631307311825e-07,
"logits": -2.2254865169525146,
"logps": -303.46368408203125,
"loss": 0.3899,
"step": 180
},
{
"epoch": 0.39540475554368154,
"grad_norm": 13.968633194350911,
"learning_rate": 3.8928102631764304e-07,
"logits": -2.254255771636963,
"logps": -246.87158203125,
"loss": 0.3705,
"step": 185
},
{
"epoch": 0.40609137055837563,
"grad_norm": 16.518660551756426,
"learning_rate": 3.8142094245262615e-07,
"logits": -2.283003568649292,
"logps": -298.9637451171875,
"loss": 0.3755,
"step": 190
},
{
"epoch": 0.4167779855730697,
"grad_norm": 17.34857995588876,
"learning_rate": 3.7337705451608667e-07,
"logits": -2.154602289199829,
"logps": -354.7010192871094,
"loss": 0.3748,
"step": 195
},
{
"epoch": 0.4274646005877638,
"grad_norm": 15.365894431623499,
"learning_rate": 3.6516061261265805e-07,
"logits": -2.255361795425415,
"logps": -273.0242004394531,
"loss": 0.3733,
"step": 200
},
{
"epoch": 0.4274646005877638,
"eval_logits": -2.2792365550994873,
"eval_logps": -309.9268493652344,
"eval_loss": 0.3719645142555237,
"eval_runtime": 491.0681,
"eval_samples_per_second": 4.008,
"eval_steps_per_second": 0.25,
"step": 200
},
{
"epoch": 0.4381512156024579,
"grad_norm": 15.070320151495675,
"learning_rate": 3.567831081792992e-07,
"logits": -2.1955361366271973,
"logps": -286.1488037109375,
"loss": 0.3799,
"step": 205
},
{
"epoch": 0.448837830617152,
"grad_norm": 14.900452812607735,
"learning_rate": 3.482562579134809e-07,
"logits": -2.0645949840545654,
"logps": -300.73236083984375,
"loss": 0.3727,
"step": 210
},
{
"epoch": 0.45952444563184613,
"grad_norm": 16.05224888818079,
"learning_rate": 3.39591987386325e-07,
"logits": -1.9900414943695068,
"logps": -282.0771484375,
"loss": 0.3582,
"step": 215
},
{
"epoch": 0.4702110606465402,
"grad_norm": 15.03367961373721,
"learning_rate": 3.30802414363615e-07,
"logits": -1.9547094106674194,
"logps": -248.65402221679688,
"loss": 0.3634,
"step": 220
},
{
"epoch": 0.4808976756612343,
"grad_norm": 18.773649100686807,
"learning_rate": 3.218998318580043e-07,
"logits": -2.17350172996521,
"logps": -326.99517822265625,
"loss": 0.367,
"step": 225
},
{
"epoch": 0.4915842906759284,
"grad_norm": 14.562679231504998,
"learning_rate": 3.128966909361271e-07,
"logits": -2.153386116027832,
"logps": -335.359375,
"loss": 0.3683,
"step": 230
},
{
"epoch": 0.5022709056906225,
"grad_norm": 13.853727892905015,
"learning_rate": 3.038055833046555e-07,
"logits": -2.035804510116577,
"logps": -272.8970642089844,
"loss": 0.3716,
"step": 235
},
{
"epoch": 0.5129575207053166,
"grad_norm": 15.090711014568011,
"learning_rate": 2.9463922369965915e-07,
"logits": -1.9920990467071533,
"logps": -318.5932922363281,
"loss": 0.3686,
"step": 240
},
{
"epoch": 0.5236441357200107,
"grad_norm": 16.18137909043194,
"learning_rate": 2.8541043210389726e-07,
"logits": -2.217284679412842,
"logps": -294.2337341308594,
"loss": 0.3545,
"step": 245
},
{
"epoch": 0.5343307507347048,
"grad_norm": 14.063753919982574,
"learning_rate": 2.761321158169134e-07,
"logits": -2.3281540870666504,
"logps": -285.443359375,
"loss": 0.3574,
"step": 250
},
{
"epoch": 0.5450173657493989,
"grad_norm": 17.098897367043495,
"learning_rate": 2.6681725140300995e-07,
"logits": -1.7651288509368896,
"logps": -297.5621032714844,
"loss": 0.3564,
"step": 255
},
{
"epoch": 0.555703980764093,
"grad_norm": 16.412107745592355,
"learning_rate": 2.574788665423496e-07,
"logits": -1.856030821800232,
"logps": -297.8916320800781,
"loss": 0.3588,
"step": 260
},
{
"epoch": 0.566390595778787,
"grad_norm": 14.888086982411561,
"learning_rate": 2.4813002181056676e-07,
"logits": -2.086013078689575,
"logps": -289.2059020996094,
"loss": 0.3562,
"step": 265
},
{
"epoch": 0.5770772107934812,
"grad_norm": 20.34056135034251,
"learning_rate": 2.3878379241237134e-07,
"logits": -1.7992274761199951,
"logps": -286.0703125,
"loss": 0.3674,
"step": 270
},
{
"epoch": 0.5877638258081752,
"grad_norm": 16.00937961787345,
"learning_rate": 2.2945324989469243e-07,
"logits": -2.1212961673736572,
"logps": -294.78125,
"loss": 0.3583,
"step": 275
},
{
"epoch": 0.5984504408228694,
"grad_norm": 15.0543607024416,
"learning_rate": 2.2015144386493895e-07,
"logits": -1.5599911212921143,
"logps": -331.1915588378906,
"loss": 0.3612,
"step": 280
},
{
"epoch": 0.6091370558375635,
"grad_norm": 15.738762756418016,
"learning_rate": 2.1089138373994222e-07,
"logits": -1.6524254083633423,
"logps": -275.34027099609375,
"loss": 0.3517,
"step": 285
},
{
"epoch": 0.6198236708522575,
"grad_norm": 14.233606209222401,
"learning_rate": 2.0168602055111173e-07,
"logits": -1.846451997756958,
"logps": -323.7337341308594,
"loss": 0.3594,
"step": 290
},
{
"epoch": 0.6305102858669517,
"grad_norm": 14.831569367257195,
"learning_rate": 1.9254822883124517e-07,
"logits": -1.5174415111541748,
"logps": -268.7288818359375,
"loss": 0.3556,
"step": 295
},
{
"epoch": 0.6411969008816457,
"grad_norm": 14.671373291294442,
"learning_rate": 1.8349078860833124e-07,
"logits": -1.7903592586517334,
"logps": -292.779052734375,
"loss": 0.3559,
"step": 300
},
{
"epoch": 0.6518835158963399,
"grad_norm": 15.705427082152443,
"learning_rate": 1.745263675315245e-07,
"logits": -1.7898918390274048,
"logps": -310.0693664550781,
"loss": 0.3571,
"step": 305
},
{
"epoch": 0.6625701309110339,
"grad_norm": 14.233332865288965,
"learning_rate": 1.656675031542925e-07,
"logits": -1.736101508140564,
"logps": -288.0835266113281,
"loss": 0.3618,
"step": 310
},
{
"epoch": 0.673256745925728,
"grad_norm": 13.101127579355996,
"learning_rate": 1.569265853995137e-07,
"logits": -2.0390021800994873,
"logps": -329.4677429199219,
"loss": 0.3578,
"step": 315
},
{
"epoch": 0.6839433609404221,
"grad_norm": 15.898783980322763,
"learning_rate": 1.4831583923104998e-07,
"logits": -1.9800916910171509,
"logps": -278.5652770996094,
"loss": 0.3391,
"step": 320
},
{
"epoch": 0.6946299759551162,
"grad_norm": 15.179976349180745,
"learning_rate": 1.3984730755602903e-07,
"logits": -2.15975284576416,
"logps": -331.96722412109375,
"loss": 0.3488,
"step": 325
},
{
"epoch": 0.7053165909698104,
"grad_norm": 16.173648063524812,
"learning_rate": 1.3153283438175034e-07,
"logits": -2.1058340072631836,
"logps": -319.34527587890625,
"loss": 0.3568,
"step": 330
},
{
"epoch": 0.7160032059845044,
"grad_norm": 14.495401917181017,
"learning_rate": 1.2338404825076935e-07,
"logits": -1.7633529901504517,
"logps": -351.7260437011719,
"loss": 0.3397,
"step": 335
},
{
"epoch": 0.7266898209991985,
"grad_norm": 14.748613870290693,
"learning_rate": 1.1541234597732947e-07,
"logits": -1.9439738988876343,
"logps": -284.2515563964844,
"loss": 0.3488,
"step": 340
},
{
"epoch": 0.7373764360138926,
"grad_norm": 15.431597691399574,
"learning_rate": 1.0762887670788701e-07,
"logits": -2.0670387744903564,
"logps": -324.03240966796875,
"loss": 0.3568,
"step": 345
},
{
"epoch": 0.7480630510285867,
"grad_norm": 13.916918536725055,
"learning_rate": 1.0004452632802158e-07,
"logits": -1.9829730987548828,
"logps": -283.0121154785156,
"loss": 0.3371,
"step": 350
},
{
"epoch": 0.7587496660432808,
"grad_norm": 14.83244858797638,
"learning_rate": 9.266990223754067e-08,
"logits": -2.1220943927764893,
"logps": -284.113525390625,
"loss": 0.3572,
"step": 355
},
{
"epoch": 0.7694362810579749,
"grad_norm": 13.906877690225956,
"learning_rate": 8.551531851507185e-08,
"logits": -1.8662292957305908,
"logps": -316.2903747558594,
"loss": 0.3534,
"step": 360
},
{
"epoch": 0.7801228960726689,
"grad_norm": 16.021070732056423,
"learning_rate": 7.859078149289144e-08,
"logits": -2.0029776096343994,
"logps": -290.8583068847656,
"loss": 0.3611,
"step": 365
},
{
"epoch": 0.7908095110873631,
"grad_norm": 13.954109703177394,
"learning_rate": 7.190597576216384e-08,
"logits": -1.896113634109497,
"logps": -294.7978210449219,
"loss": 0.349,
"step": 370
},
{
"epoch": 0.8014961261020572,
"grad_norm": 18.103369920066683,
"learning_rate": 6.547025062816486e-08,
"logits": -1.7909294366836548,
"logps": -298.8819885253906,
"loss": 0.3567,
"step": 375
},
{
"epoch": 0.8121827411167513,
"grad_norm": 12.482171705770575,
"learning_rate": 5.929260703443337e-08,
"logits": -1.713022232055664,
"logps": -309.056396484375,
"loss": 0.3468,
"step": 380
},
{
"epoch": 0.8228693561314454,
"grad_norm": 15.966406254185237,
"learning_rate": 5.338168497413756e-08,
"logits": -1.485386610031128,
"logps": -301.24560546875,
"loss": 0.3568,
"step": 385
},
{
"epoch": 0.8335559711461394,
"grad_norm": 14.935885056404505,
"learning_rate": 4.774575140626316e-08,
"logits": -1.68508780002594,
"logps": -302.7105407714844,
"loss": 0.3465,
"step": 390
},
{
"epoch": 0.8442425861608336,
"grad_norm": 14.494342289383189,
"learning_rate": 4.2392688693524055e-08,
"logits": -1.780106782913208,
"logps": -281.1724548339844,
"loss": 0.3577,
"step": 395
},
{
"epoch": 0.8549292011755276,
"grad_norm": 14.966163192699174,
"learning_rate": 3.732998357816514e-08,
"logits": -1.9449115991592407,
"logps": -308.26251220703125,
"loss": 0.355,
"step": 400
},
{
"epoch": 0.8549292011755276,
"eval_logits": -1.9703269004821777,
"eval_logps": -324.5088806152344,
"eval_loss": 0.3534272313117981,
"eval_runtime": 480.7572,
"eval_samples_per_second": 4.094,
"eval_steps_per_second": 0.256,
"step": 400
},
{
"epoch": 0.8656158161902218,
"grad_norm": 14.52513199126684,
"learning_rate": 3.256471671107616e-08,
"logits": -1.9270665645599365,
"logps": -320.7890319824219,
"loss": 0.357,
"step": 405
},
{
"epoch": 0.8763024312049158,
"grad_norm": 13.984563355291044,
"learning_rate": 2.8103552748861475e-08,
"logits": -1.7152255773544312,
"logps": -300.7731628417969,
"loss": 0.3598,
"step": 410
},
{
"epoch": 0.88698904621961,
"grad_norm": 14.474719718248345,
"learning_rate": 2.3952731032714973e-08,
"logits": -1.8561521768569946,
"logps": -279.46380615234375,
"loss": 0.3509,
"step": 415
},
{
"epoch": 0.897675661234304,
"grad_norm": 15.20686225812657,
"learning_rate": 2.0118056862137354e-08,
"logits": -2.1437106132507324,
"logps": -296.0022277832031,
"loss": 0.3596,
"step": 420
},
{
"epoch": 0.9083622762489981,
"grad_norm": 13.705499471422334,
"learning_rate": 1.6604893375699592e-08,
"logits": -1.8986858129501343,
"logps": -294.5618896484375,
"loss": 0.3566,
"step": 425
},
{
"epoch": 0.9190488912636923,
"grad_norm": 14.224665908488847,
"learning_rate": 1.3418154050208936e-08,
"logits": -1.9494727849960327,
"logps": -325.150634765625,
"loss": 0.3432,
"step": 430
},
{
"epoch": 0.9297355062783863,
"grad_norm": 13.456954319141683,
"learning_rate": 1.0562295828767387e-08,
"logits": -2.093982458114624,
"logps": -293.69842529296875,
"loss": 0.3565,
"step": 435
},
{
"epoch": 0.9404221212930804,
"grad_norm": 13.391645385265695,
"learning_rate": 8.041312887333396e-09,
"logits": -1.904130220413208,
"logps": -276.39105224609375,
"loss": 0.3404,
"step": 440
},
{
"epoch": 0.9511087363077745,
"grad_norm": 20.71139273996219,
"learning_rate": 5.858731048505927e-09,
"logits": -1.914345383644104,
"logps": -338.41656494140625,
"loss": 0.3573,
"step": 445
},
{
"epoch": 0.9617953513224686,
"grad_norm": 19.97291791251005,
"learning_rate": 4.0176028503425826e-09,
"logits": -1.9096574783325195,
"logps": -301.0379943847656,
"loss": 0.355,
"step": 450
},
{
"epoch": 0.9724819663371627,
"grad_norm": 14.314824993795222,
"learning_rate": 2.5205032771092592e-09,
"logits": -1.702121376991272,
"logps": -298.85516357421875,
"loss": 0.3377,
"step": 455
},
{
"epoch": 0.9831685813518568,
"grad_norm": 13.935952369098977,
"learning_rate": 1.3695261579316775e-09,
"logits": -1.7297840118408203,
"logps": -259.74139404296875,
"loss": 0.3506,
"step": 460
},
{
"epoch": 0.9938551963665508,
"grad_norm": 16.215817529965406,
"learning_rate": 5.662812383859794e-10,
"logits": -1.9168570041656494,
"logps": -299.5458068847656,
"loss": 0.3554,
"step": 465
},
{
"epoch": 0.9981298423724285,
"step": 467,
"total_flos": 0.0,
"train_loss": 0.3941320441264412,
"train_runtime": 39337.3566,
"train_samples_per_second": 1.522,
"train_steps_per_second": 0.012
}
],
"logging_steps": 5,
"max_steps": 467,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 125,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}