File size: 10,783 Bytes
49a4812
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
{
  "best_metric": 0.5314118934504415,
  "best_model_checkpoint": "./results/checkpoint-15120",
  "epoch": 18.0,
  "eval_steps": 500,
  "global_step": 15120,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.6,
      "learning_rate": 2.9761904761904762e-05,
      "loss": 7.0613,
      "step": 500
    },
    {
      "epoch": 1.0,
      "eval_loss": 1.2605280876159668,
      "eval_rouge1": 0.5053086747478719,
      "eval_rouge2": 0.24615333793097546,
      "eval_rougeL": 0.4512970323707136,
      "eval_rougeLsum": 0.4656617846478366,
      "eval_runtime": 772.9001,
      "eval_samples_per_second": 1.938,
      "eval_steps_per_second": 0.122,
      "step": 840
    },
    {
      "epoch": 1.19,
      "learning_rate": 4.9498746867167923e-05,
      "loss": 1.4331,
      "step": 1000
    },
    {
      "epoch": 1.79,
      "learning_rate": 4.793233082706767e-05,
      "loss": 1.3665,
      "step": 1500
    },
    {
      "epoch": 2.0,
      "eval_loss": 1.2270965576171875,
      "eval_rouge1": 0.5124476660885933,
      "eval_rouge2": 0.256176861062982,
      "eval_rougeL": 0.4600482066009496,
      "eval_rougeLsum": 0.4746519151030747,
      "eval_runtime": 1039.1433,
      "eval_samples_per_second": 1.442,
      "eval_steps_per_second": 0.09,
      "step": 1680
    },
    {
      "epoch": 2.38,
      "learning_rate": 4.6365914786967416e-05,
      "loss": 1.3381,
      "step": 2000
    },
    {
      "epoch": 2.98,
      "learning_rate": 4.4799498746867166e-05,
      "loss": 1.3009,
      "step": 2500
    },
    {
      "epoch": 3.0,
      "eval_loss": 1.2114347219467163,
      "eval_rouge1": 0.5179075668199837,
      "eval_rouge2": 0.2612977945518593,
      "eval_rougeL": 0.46524322885156827,
      "eval_rougeLsum": 0.4802301912620997,
      "eval_runtime": 1018.7441,
      "eval_samples_per_second": 1.47,
      "eval_steps_per_second": 0.092,
      "step": 2520
    },
    {
      "epoch": 3.57,
      "learning_rate": 4.323308270676692e-05,
      "loss": 1.2594,
      "step": 3000
    },
    {
      "epoch": 4.0,
      "eval_loss": 1.203908085823059,
      "eval_rouge1": 0.5229139335151443,
      "eval_rouge2": 0.26558551302892763,
      "eval_rougeL": 0.46774305498128443,
      "eval_rougeLsum": 0.483920816134904,
      "eval_runtime": 926.8973,
      "eval_samples_per_second": 1.616,
      "eval_steps_per_second": 0.101,
      "step": 3360
    },
    {
      "epoch": 4.17,
      "learning_rate": 4.166666666666667e-05,
      "loss": 1.247,
      "step": 3500
    },
    {
      "epoch": 4.76,
      "learning_rate": 4.0100250626566415e-05,
      "loss": 1.227,
      "step": 4000
    },
    {
      "epoch": 5.0,
      "eval_loss": 1.1987696886062622,
      "eval_rouge1": 0.5247159372903829,
      "eval_rouge2": 0.2668774790757894,
      "eval_rougeL": 0.4698131046976711,
      "eval_rougeLsum": 0.4859852789981728,
      "eval_runtime": 1138.215,
      "eval_samples_per_second": 1.316,
      "eval_steps_per_second": 0.083,
      "step": 4200
    },
    {
      "epoch": 5.36,
      "learning_rate": 3.8533834586466165e-05,
      "loss": 1.1951,
      "step": 4500
    },
    {
      "epoch": 5.95,
      "learning_rate": 3.6967418546365914e-05,
      "loss": 1.1918,
      "step": 5000
    },
    {
      "epoch": 6.0,
      "eval_loss": 1.1965585947036743,
      "eval_rouge1": 0.5238465835630357,
      "eval_rouge2": 0.2697694029030245,
      "eval_rougeL": 0.4700972375246304,
      "eval_rougeLsum": 0.48604440949884575,
      "eval_runtime": 1144.1344,
      "eval_samples_per_second": 1.309,
      "eval_steps_per_second": 0.082,
      "step": 5040
    },
    {
      "epoch": 6.55,
      "learning_rate": 3.540100250626567e-05,
      "loss": 1.1696,
      "step": 5500
    },
    {
      "epoch": 7.0,
      "eval_loss": 1.193439245223999,
      "eval_rouge1": 0.5261420787321425,
      "eval_rouge2": 0.2701869159088741,
      "eval_rougeL": 0.472288301825879,
      "eval_rougeLsum": 0.48796847398006715,
      "eval_runtime": 913.4514,
      "eval_samples_per_second": 1.64,
      "eval_steps_per_second": 0.103,
      "step": 5880
    },
    {
      "epoch": 7.14,
      "learning_rate": 3.3834586466165414e-05,
      "loss": 1.1472,
      "step": 6000
    },
    {
      "epoch": 7.74,
      "learning_rate": 3.2268170426065164e-05,
      "loss": 1.1285,
      "step": 6500
    },
    {
      "epoch": 8.0,
      "eval_loss": 1.1932892799377441,
      "eval_rouge1": 0.5236558096283529,
      "eval_rouge2": 0.26931250040124155,
      "eval_rougeL": 0.4700827158260693,
      "eval_rougeLsum": 0.48489767885463625,
      "eval_runtime": 1155.7348,
      "eval_samples_per_second": 1.296,
      "eval_steps_per_second": 0.081,
      "step": 6720
    },
    {
      "epoch": 8.33,
      "learning_rate": 3.0701754385964913e-05,
      "loss": 1.1234,
      "step": 7000
    },
    {
      "epoch": 8.93,
      "learning_rate": 2.9135338345864667e-05,
      "loss": 1.1153,
      "step": 7500
    },
    {
      "epoch": 9.0,
      "eval_loss": 1.1960569620132446,
      "eval_rouge1": 0.5263030048004651,
      "eval_rouge2": 0.27081863504950743,
      "eval_rougeL": 0.4724221317085258,
      "eval_rougeLsum": 0.48803341513882764,
      "eval_runtime": 983.851,
      "eval_samples_per_second": 1.523,
      "eval_steps_per_second": 0.096,
      "step": 7560
    },
    {
      "epoch": 9.52,
      "learning_rate": 2.756892230576441e-05,
      "loss": 1.0927,
      "step": 8000
    },
    {
      "epoch": 10.0,
      "eval_loss": 1.1961216926574707,
      "eval_rouge1": 0.5253667627324876,
      "eval_rouge2": 0.2690944896569006,
      "eval_rougeL": 0.4720451976404024,
      "eval_rougeLsum": 0.48738350139979847,
      "eval_runtime": 932.6258,
      "eval_samples_per_second": 1.606,
      "eval_steps_per_second": 0.101,
      "step": 8400
    },
    {
      "epoch": 10.12,
      "learning_rate": 2.6002506265664163e-05,
      "loss": 1.0933,
      "step": 8500
    },
    {
      "epoch": 10.71,
      "learning_rate": 2.443609022556391e-05,
      "loss": 1.0661,
      "step": 9000
    },
    {
      "epoch": 11.0,
      "eval_loss": 1.2010161876678467,
      "eval_rouge1": 0.5234197228179973,
      "eval_rouge2": 0.2683998445240433,
      "eval_rougeL": 0.4697712822647654,
      "eval_rougeLsum": 0.48542105989790263,
      "eval_runtime": 1250.3048,
      "eval_samples_per_second": 1.198,
      "eval_steps_per_second": 0.075,
      "step": 9240
    },
    {
      "epoch": 11.31,
      "learning_rate": 2.2869674185463662e-05,
      "loss": 1.0663,
      "step": 9500
    },
    {
      "epoch": 11.9,
      "learning_rate": 2.130325814536341e-05,
      "loss": 1.0634,
      "step": 10000
    },
    {
      "epoch": 12.0,
      "eval_loss": 1.2002513408660889,
      "eval_rouge1": 0.5259472773406924,
      "eval_rouge2": 0.2723400096809272,
      "eval_rougeL": 0.47293541812561896,
      "eval_rougeLsum": 0.4885362784921822,
      "eval_runtime": 1027.3406,
      "eval_samples_per_second": 1.458,
      "eval_steps_per_second": 0.091,
      "step": 10080
    },
    {
      "epoch": 12.5,
      "learning_rate": 1.9736842105263158e-05,
      "loss": 1.046,
      "step": 10500
    },
    {
      "epoch": 13.0,
      "eval_loss": 1.2019047737121582,
      "eval_rouge1": 0.527682777778989,
      "eval_rouge2": 0.27261485269714236,
      "eval_rougeL": 0.4747461246549739,
      "eval_rougeLsum": 0.49067235332780945,
      "eval_runtime": 1023.6212,
      "eval_samples_per_second": 1.463,
      "eval_steps_per_second": 0.092,
      "step": 10920
    },
    {
      "epoch": 13.1,
      "learning_rate": 1.8170426065162908e-05,
      "loss": 1.0454,
      "step": 11000
    },
    {
      "epoch": 13.69,
      "learning_rate": 1.6604010025062658e-05,
      "loss": 1.0273,
      "step": 11500
    },
    {
      "epoch": 14.0,
      "eval_loss": 1.2045047283172607,
      "eval_rouge1": 0.5309321460587708,
      "eval_rouge2": 0.27493073812627356,
      "eval_rougeL": 0.4776371159725116,
      "eval_rougeLsum": 0.49401653255083877,
      "eval_runtime": 933.4164,
      "eval_samples_per_second": 1.605,
      "eval_steps_per_second": 0.101,
      "step": 11760
    },
    {
      "epoch": 14.29,
      "learning_rate": 1.5037593984962406e-05,
      "loss": 1.0257,
      "step": 12000
    },
    {
      "epoch": 14.88,
      "learning_rate": 1.3471177944862157e-05,
      "loss": 1.0218,
      "step": 12500
    },
    {
      "epoch": 15.0,
      "eval_loss": 1.207719326019287,
      "eval_rouge1": 0.5295059932432493,
      "eval_rouge2": 0.2727809059549531,
      "eval_rougeL": 0.4770545143695025,
      "eval_rougeLsum": 0.49245995561354516,
      "eval_runtime": 874.8747,
      "eval_samples_per_second": 1.712,
      "eval_steps_per_second": 0.107,
      "step": 12600
    },
    {
      "epoch": 15.48,
      "learning_rate": 1.1904761904761905e-05,
      "loss": 1.0208,
      "step": 13000
    },
    {
      "epoch": 16.0,
      "eval_loss": 1.2094707489013672,
      "eval_rouge1": 0.5303496124431821,
      "eval_rouge2": 0.27278313644220603,
      "eval_rougeL": 0.47754216166274177,
      "eval_rougeLsum": 0.49284317669377947,
      "eval_runtime": 939.0978,
      "eval_samples_per_second": 1.595,
      "eval_steps_per_second": 0.1,
      "step": 13440
    },
    {
      "epoch": 16.07,
      "learning_rate": 1.0338345864661655e-05,
      "loss": 1.0093,
      "step": 13500
    },
    {
      "epoch": 16.67,
      "learning_rate": 8.771929824561403e-06,
      "loss": 1.003,
      "step": 14000
    },
    {
      "epoch": 17.0,
      "eval_loss": 1.2109801769256592,
      "eval_rouge1": 0.5301463905288382,
      "eval_rouge2": 0.2726100969561255,
      "eval_rougeL": 0.47718245520328006,
      "eval_rougeLsum": 0.49294754454933043,
      "eval_runtime": 829.8724,
      "eval_samples_per_second": 1.805,
      "eval_steps_per_second": 0.113,
      "step": 14280
    },
    {
      "epoch": 17.26,
      "learning_rate": 7.205513784461153e-06,
      "loss": 1.0002,
      "step": 14500
    },
    {
      "epoch": 17.86,
      "learning_rate": 5.639097744360902e-06,
      "loss": 1.003,
      "step": 15000
    },
    {
      "epoch": 18.0,
      "eval_loss": 1.209855556488037,
      "eval_rouge1": 0.5314118934504415,
      "eval_rouge2": 0.27293095312101445,
      "eval_rougeL": 0.47805026323896327,
      "eval_rougeLsum": 0.49408516728917673,
      "eval_runtime": 882.3325,
      "eval_samples_per_second": 1.698,
      "eval_steps_per_second": 0.107,
      "step": 15120
    }
  ],
  "logging_steps": 500,
  "max_steps": 16800,
  "num_train_epochs": 20,
  "save_steps": 500,
  "total_flos": 3.682281994080768e+16,
  "trial_name": null,
  "trial_params": null
}