yexter commited on
Commit
30c4792
·
verified ·
1 Parent(s): 798e6ee

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +8 -0
  2. test_results.json +8 -0
  3. trainer_state.json +959 -0
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.196734693877551,
3
+ "eval_accuracy": 0.8441558441558441,
4
+ "eval_loss": 0.5236971378326416,
5
+ "eval_runtime": 632.6262,
6
+ "eval_samples_per_second": 2.313,
7
+ "eval_steps_per_second": 0.096
8
+ }
test_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 4.196734693877551,
3
+ "eval_accuracy": 0.8441558441558441,
4
+ "eval_loss": 0.5236971378326416,
5
+ "eval_runtime": 632.6262,
6
+ "eval_samples_per_second": 2.313,
7
+ "eval_steps_per_second": 0.096
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,959 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.7619883040935672,
3
+ "best_model_checkpoint": "videomae-base-Badminton_strokes-finetuned-stroke-classification\\checkpoint-1225",
4
+ "epoch": 4.196734693877551,
5
+ "eval_steps": 500,
6
+ "global_step": 1225,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.00816326530612245,
13
+ "grad_norm": 5.244400501251221,
14
+ "learning_rate": 4.0650406504065046e-06,
15
+ "loss": 2.4952,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.0163265306122449,
20
+ "grad_norm": 6.468714714050293,
21
+ "learning_rate": 8.130081300813009e-06,
22
+ "loss": 2.3526,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.024489795918367346,
27
+ "grad_norm": 4.259028434753418,
28
+ "learning_rate": 1.2195121951219513e-05,
29
+ "loss": 2.1971,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.0326530612244898,
34
+ "grad_norm": 4.142731666564941,
35
+ "learning_rate": 1.6260162601626018e-05,
36
+ "loss": 2.1394,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.04081632653061224,
41
+ "grad_norm": 4.3925089836120605,
42
+ "learning_rate": 2.032520325203252e-05,
43
+ "loss": 2.1323,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.04897959183673469,
48
+ "grad_norm": 4.224646091461182,
49
+ "learning_rate": 2.4390243902439026e-05,
50
+ "loss": 2.0748,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.05714285714285714,
55
+ "grad_norm": 5.832901954650879,
56
+ "learning_rate": 2.8455284552845528e-05,
57
+ "loss": 2.0847,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.0653061224489796,
62
+ "grad_norm": 4.4013671875,
63
+ "learning_rate": 3.2520325203252037e-05,
64
+ "loss": 2.0667,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.07346938775510205,
69
+ "grad_norm": 4.626805305480957,
70
+ "learning_rate": 3.6585365853658535e-05,
71
+ "loss": 1.9221,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.08163265306122448,
76
+ "grad_norm": 7.152433395385742,
77
+ "learning_rate": 4.065040650406504e-05,
78
+ "loss": 1.8691,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.08979591836734693,
83
+ "grad_norm": 4.98500394821167,
84
+ "learning_rate": 4.4715447154471546e-05,
85
+ "loss": 1.7676,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.09795918367346938,
90
+ "grad_norm": 12.184016227722168,
91
+ "learning_rate": 4.878048780487805e-05,
92
+ "loss": 1.7009,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.10612244897959183,
97
+ "grad_norm": 5.2860894203186035,
98
+ "learning_rate": 4.9682395644283126e-05,
99
+ "loss": 1.5885,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.11428571428571428,
104
+ "grad_norm": 7.862862586975098,
105
+ "learning_rate": 4.922867513611615e-05,
106
+ "loss": 1.5489,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.12244897959183673,
111
+ "grad_norm": 5.597222328186035,
112
+ "learning_rate": 4.877495462794919e-05,
113
+ "loss": 1.5169,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.1306122448979592,
118
+ "grad_norm": 5.168361186981201,
119
+ "learning_rate": 4.8321234119782216e-05,
120
+ "loss": 1.429,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.13877551020408163,
125
+ "grad_norm": 4.200538158416748,
126
+ "learning_rate": 4.786751361161525e-05,
127
+ "loss": 1.4782,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 0.1469387755102041,
132
+ "grad_norm": 6.290266513824463,
133
+ "learning_rate": 4.741379310344828e-05,
134
+ "loss": 1.4558,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 0.15510204081632653,
139
+ "grad_norm": 5.306058406829834,
140
+ "learning_rate": 4.6960072595281306e-05,
141
+ "loss": 1.3866,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 0.16326530612244897,
146
+ "grad_norm": 4.555312156677246,
147
+ "learning_rate": 4.650635208711434e-05,
148
+ "loss": 1.3229,
149
+ "step": 200
150
+ },
151
+ {
152
+ "epoch": 0.17142857142857143,
153
+ "grad_norm": 6.407901763916016,
154
+ "learning_rate": 4.605263157894737e-05,
155
+ "loss": 1.3421,
156
+ "step": 210
157
+ },
158
+ {
159
+ "epoch": 0.17959183673469387,
160
+ "grad_norm": 5.324537754058838,
161
+ "learning_rate": 4.55989110707804e-05,
162
+ "loss": 1.2384,
163
+ "step": 220
164
+ },
165
+ {
166
+ "epoch": 0.18775510204081633,
167
+ "grad_norm": 5.129929542541504,
168
+ "learning_rate": 4.5145190562613434e-05,
169
+ "loss": 1.2475,
170
+ "step": 230
171
+ },
172
+ {
173
+ "epoch": 0.19591836734693877,
174
+ "grad_norm": 7.351629257202148,
175
+ "learning_rate": 4.469147005444646e-05,
176
+ "loss": 1.2138,
177
+ "step": 240
178
+ },
179
+ {
180
+ "epoch": 0.20081632653061224,
181
+ "eval_accuracy": 0.512280701754386,
182
+ "eval_loss": 1.4943147897720337,
183
+ "eval_runtime": 765.4217,
184
+ "eval_samples_per_second": 2.234,
185
+ "eval_steps_per_second": 0.094,
186
+ "step": 246
187
+ },
188
+ {
189
+ "epoch": 1.003265306122449,
190
+ "grad_norm": 3.978257179260254,
191
+ "learning_rate": 4.423774954627949e-05,
192
+ "loss": 1.1764,
193
+ "step": 250
194
+ },
195
+ {
196
+ "epoch": 1.0114285714285713,
197
+ "grad_norm": 4.829442977905273,
198
+ "learning_rate": 4.3784029038112524e-05,
199
+ "loss": 1.0118,
200
+ "step": 260
201
+ },
202
+ {
203
+ "epoch": 1.019591836734694,
204
+ "grad_norm": 5.639930725097656,
205
+ "learning_rate": 4.3330308529945556e-05,
206
+ "loss": 1.2025,
207
+ "step": 270
208
+ },
209
+ {
210
+ "epoch": 1.0277551020408162,
211
+ "grad_norm": 4.805947780609131,
212
+ "learning_rate": 4.287658802177859e-05,
213
+ "loss": 1.0476,
214
+ "step": 280
215
+ },
216
+ {
217
+ "epoch": 1.0359183673469388,
218
+ "grad_norm": 4.968803882598877,
219
+ "learning_rate": 4.242286751361162e-05,
220
+ "loss": 1.0606,
221
+ "step": 290
222
+ },
223
+ {
224
+ "epoch": 1.0440816326530613,
225
+ "grad_norm": 7.550710201263428,
226
+ "learning_rate": 4.1969147005444646e-05,
227
+ "loss": 1.1504,
228
+ "step": 300
229
+ },
230
+ {
231
+ "epoch": 1.0522448979591836,
232
+ "grad_norm": 9.26010799407959,
233
+ "learning_rate": 4.151542649727768e-05,
234
+ "loss": 1.1346,
235
+ "step": 310
236
+ },
237
+ {
238
+ "epoch": 1.0604081632653062,
239
+ "grad_norm": 12.30020523071289,
240
+ "learning_rate": 4.106170598911071e-05,
241
+ "loss": 0.9971,
242
+ "step": 320
243
+ },
244
+ {
245
+ "epoch": 1.0685714285714285,
246
+ "grad_norm": 9.262853622436523,
247
+ "learning_rate": 4.0607985480943736e-05,
248
+ "loss": 1.0591,
249
+ "step": 330
250
+ },
251
+ {
252
+ "epoch": 1.076734693877551,
253
+ "grad_norm": 9.531743049621582,
254
+ "learning_rate": 4.0154264972776775e-05,
255
+ "loss": 0.9533,
256
+ "step": 340
257
+ },
258
+ {
259
+ "epoch": 1.0848979591836734,
260
+ "grad_norm": 11.760782241821289,
261
+ "learning_rate": 3.97005444646098e-05,
262
+ "loss": 1.0115,
263
+ "step": 350
264
+ },
265
+ {
266
+ "epoch": 1.093061224489796,
267
+ "grad_norm": 8.676046371459961,
268
+ "learning_rate": 3.924682395644283e-05,
269
+ "loss": 1.0866,
270
+ "step": 360
271
+ },
272
+ {
273
+ "epoch": 1.1012244897959185,
274
+ "grad_norm": 6.559369087219238,
275
+ "learning_rate": 3.8793103448275865e-05,
276
+ "loss": 0.9825,
277
+ "step": 370
278
+ },
279
+ {
280
+ "epoch": 1.1093877551020408,
281
+ "grad_norm": 8.812031745910645,
282
+ "learning_rate": 3.833938294010889e-05,
283
+ "loss": 1.121,
284
+ "step": 380
285
+ },
286
+ {
287
+ "epoch": 1.1175510204081633,
288
+ "grad_norm": 5.920422077178955,
289
+ "learning_rate": 3.788566243194193e-05,
290
+ "loss": 1.0367,
291
+ "step": 390
292
+ },
293
+ {
294
+ "epoch": 1.1257142857142857,
295
+ "grad_norm": 6.096667289733887,
296
+ "learning_rate": 3.7431941923774954e-05,
297
+ "loss": 0.9247,
298
+ "step": 400
299
+ },
300
+ {
301
+ "epoch": 1.1338775510204082,
302
+ "grad_norm": 8.243738174438477,
303
+ "learning_rate": 3.6978221415607986e-05,
304
+ "loss": 0.8421,
305
+ "step": 410
306
+ },
307
+ {
308
+ "epoch": 1.1420408163265305,
309
+ "grad_norm": 13.763681411743164,
310
+ "learning_rate": 3.652450090744102e-05,
311
+ "loss": 0.7692,
312
+ "step": 420
313
+ },
314
+ {
315
+ "epoch": 1.150204081632653,
316
+ "grad_norm": 13.45505428314209,
317
+ "learning_rate": 3.607078039927405e-05,
318
+ "loss": 0.9248,
319
+ "step": 430
320
+ },
321
+ {
322
+ "epoch": 1.1583673469387756,
323
+ "grad_norm": 6.331974506378174,
324
+ "learning_rate": 3.561705989110708e-05,
325
+ "loss": 1.011,
326
+ "step": 440
327
+ },
328
+ {
329
+ "epoch": 1.166530612244898,
330
+ "grad_norm": 7.715512275695801,
331
+ "learning_rate": 3.516333938294011e-05,
332
+ "loss": 0.8162,
333
+ "step": 450
334
+ },
335
+ {
336
+ "epoch": 1.1746938775510205,
337
+ "grad_norm": 6.705671787261963,
338
+ "learning_rate": 3.470961887477314e-05,
339
+ "loss": 0.8677,
340
+ "step": 460
341
+ },
342
+ {
343
+ "epoch": 1.1828571428571428,
344
+ "grad_norm": 11.03935718536377,
345
+ "learning_rate": 3.425589836660617e-05,
346
+ "loss": 0.9038,
347
+ "step": 470
348
+ },
349
+ {
350
+ "epoch": 1.1910204081632654,
351
+ "grad_norm": 5.334926605224609,
352
+ "learning_rate": 3.3802177858439205e-05,
353
+ "loss": 0.8735,
354
+ "step": 480
355
+ },
356
+ {
357
+ "epoch": 1.1991836734693877,
358
+ "grad_norm": 9.334040641784668,
359
+ "learning_rate": 3.334845735027223e-05,
360
+ "loss": 0.8947,
361
+ "step": 490
362
+ },
363
+ {
364
+ "epoch": 1.2008163265306122,
365
+ "eval_accuracy": 0.6771929824561403,
366
+ "eval_loss": 1.2374348640441895,
367
+ "eval_runtime": 753.5154,
368
+ "eval_samples_per_second": 2.269,
369
+ "eval_steps_per_second": 0.096,
370
+ "step": 492
371
+ },
372
+ {
373
+ "epoch": 2.006530612244898,
374
+ "grad_norm": 7.620924472808838,
375
+ "learning_rate": 3.289473684210527e-05,
376
+ "loss": 0.7791,
377
+ "step": 500
378
+ },
379
+ {
380
+ "epoch": 2.0146938775510206,
381
+ "grad_norm": 5.360889911651611,
382
+ "learning_rate": 3.2441016333938295e-05,
383
+ "loss": 0.6921,
384
+ "step": 510
385
+ },
386
+ {
387
+ "epoch": 2.0228571428571427,
388
+ "grad_norm": 32.47731018066406,
389
+ "learning_rate": 3.198729582577133e-05,
390
+ "loss": 0.5907,
391
+ "step": 520
392
+ },
393
+ {
394
+ "epoch": 2.0310204081632652,
395
+ "grad_norm": 6.6628289222717285,
396
+ "learning_rate": 3.153357531760436e-05,
397
+ "loss": 0.7626,
398
+ "step": 530
399
+ },
400
+ {
401
+ "epoch": 2.039183673469388,
402
+ "grad_norm": 6.246597766876221,
403
+ "learning_rate": 3.1079854809437384e-05,
404
+ "loss": 0.731,
405
+ "step": 540
406
+ },
407
+ {
408
+ "epoch": 2.0473469387755103,
409
+ "grad_norm": 5.773654937744141,
410
+ "learning_rate": 3.062613430127042e-05,
411
+ "loss": 0.7799,
412
+ "step": 550
413
+ },
414
+ {
415
+ "epoch": 2.0555102040816324,
416
+ "grad_norm": 8.651352882385254,
417
+ "learning_rate": 3.017241379310345e-05,
418
+ "loss": 0.7187,
419
+ "step": 560
420
+ },
421
+ {
422
+ "epoch": 2.063673469387755,
423
+ "grad_norm": 8.399853706359863,
424
+ "learning_rate": 2.9718693284936484e-05,
425
+ "loss": 0.7076,
426
+ "step": 570
427
+ },
428
+ {
429
+ "epoch": 2.0718367346938775,
430
+ "grad_norm": 10.44221305847168,
431
+ "learning_rate": 2.9264972776769513e-05,
432
+ "loss": 0.7123,
433
+ "step": 580
434
+ },
435
+ {
436
+ "epoch": 2.08,
437
+ "grad_norm": 8.12469482421875,
438
+ "learning_rate": 2.881125226860254e-05,
439
+ "loss": 0.6117,
440
+ "step": 590
441
+ },
442
+ {
443
+ "epoch": 2.0881632653061226,
444
+ "grad_norm": 12.644967079162598,
445
+ "learning_rate": 2.8357531760435574e-05,
446
+ "loss": 0.7665,
447
+ "step": 600
448
+ },
449
+ {
450
+ "epoch": 2.0963265306122447,
451
+ "grad_norm": 8.58361530303955,
452
+ "learning_rate": 2.7903811252268603e-05,
453
+ "loss": 0.5814,
454
+ "step": 610
455
+ },
456
+ {
457
+ "epoch": 2.1044897959183673,
458
+ "grad_norm": 8.686047554016113,
459
+ "learning_rate": 2.7450090744101635e-05,
460
+ "loss": 0.879,
461
+ "step": 620
462
+ },
463
+ {
464
+ "epoch": 2.11265306122449,
465
+ "grad_norm": 6.443877696990967,
466
+ "learning_rate": 2.6996370235934664e-05,
467
+ "loss": 0.661,
468
+ "step": 630
469
+ },
470
+ {
471
+ "epoch": 2.1208163265306124,
472
+ "grad_norm": 18.404951095581055,
473
+ "learning_rate": 2.65426497277677e-05,
474
+ "loss": 0.7436,
475
+ "step": 640
476
+ },
477
+ {
478
+ "epoch": 2.1289795918367345,
479
+ "grad_norm": 7.879423141479492,
480
+ "learning_rate": 2.6088929219600728e-05,
481
+ "loss": 0.4849,
482
+ "step": 650
483
+ },
484
+ {
485
+ "epoch": 2.137142857142857,
486
+ "grad_norm": 6.912235736846924,
487
+ "learning_rate": 2.5635208711433757e-05,
488
+ "loss": 0.6246,
489
+ "step": 660
490
+ },
491
+ {
492
+ "epoch": 2.1453061224489796,
493
+ "grad_norm": 10.059391975402832,
494
+ "learning_rate": 2.518148820326679e-05,
495
+ "loss": 0.6314,
496
+ "step": 670
497
+ },
498
+ {
499
+ "epoch": 2.153469387755102,
500
+ "grad_norm": 8.129505157470703,
501
+ "learning_rate": 2.472776769509982e-05,
502
+ "loss": 0.7015,
503
+ "step": 680
504
+ },
505
+ {
506
+ "epoch": 2.1616326530612247,
507
+ "grad_norm": 9.07080364227295,
508
+ "learning_rate": 2.427404718693285e-05,
509
+ "loss": 0.6986,
510
+ "step": 690
511
+ },
512
+ {
513
+ "epoch": 2.1697959183673468,
514
+ "grad_norm": 8.254867553710938,
515
+ "learning_rate": 2.3820326678765882e-05,
516
+ "loss": 0.684,
517
+ "step": 700
518
+ },
519
+ {
520
+ "epoch": 2.1779591836734693,
521
+ "grad_norm": 8.124053955078125,
522
+ "learning_rate": 2.336660617059891e-05,
523
+ "loss": 0.5424,
524
+ "step": 710
525
+ },
526
+ {
527
+ "epoch": 2.186122448979592,
528
+ "grad_norm": 5.182572364807129,
529
+ "learning_rate": 2.2912885662431943e-05,
530
+ "loss": 0.6461,
531
+ "step": 720
532
+ },
533
+ {
534
+ "epoch": 2.1942857142857144,
535
+ "grad_norm": 10.561123847961426,
536
+ "learning_rate": 2.2459165154264975e-05,
537
+ "loss": 0.5704,
538
+ "step": 730
539
+ },
540
+ {
541
+ "epoch": 2.2008163265306124,
542
+ "eval_accuracy": 0.6871345029239766,
543
+ "eval_loss": 1.2350349426269531,
544
+ "eval_runtime": 759.3483,
545
+ "eval_samples_per_second": 2.252,
546
+ "eval_steps_per_second": 0.095,
547
+ "step": 738
548
+ },
549
+ {
550
+ "epoch": 3.0016326530612245,
551
+ "grad_norm": 9.009034156799316,
552
+ "learning_rate": 2.2005444646098004e-05,
553
+ "loss": 0.7604,
554
+ "step": 740
555
+ },
556
+ {
557
+ "epoch": 3.009795918367347,
558
+ "grad_norm": 8.081964492797852,
559
+ "learning_rate": 2.1551724137931033e-05,
560
+ "loss": 0.5601,
561
+ "step": 750
562
+ },
563
+ {
564
+ "epoch": 3.0179591836734696,
565
+ "grad_norm": 5.625163555145264,
566
+ "learning_rate": 2.1098003629764065e-05,
567
+ "loss": 0.5588,
568
+ "step": 760
569
+ },
570
+ {
571
+ "epoch": 3.0261224489795917,
572
+ "grad_norm": 18.619417190551758,
573
+ "learning_rate": 2.0644283121597097e-05,
574
+ "loss": 0.5016,
575
+ "step": 770
576
+ },
577
+ {
578
+ "epoch": 3.0342857142857143,
579
+ "grad_norm": 4.800343036651611,
580
+ "learning_rate": 2.019056261343013e-05,
581
+ "loss": 0.5072,
582
+ "step": 780
583
+ },
584
+ {
585
+ "epoch": 3.042448979591837,
586
+ "grad_norm": 7.775487422943115,
587
+ "learning_rate": 1.9736842105263158e-05,
588
+ "loss": 0.6298,
589
+ "step": 790
590
+ },
591
+ {
592
+ "epoch": 3.0506122448979593,
593
+ "grad_norm": 7.29972505569458,
594
+ "learning_rate": 1.928312159709619e-05,
595
+ "loss": 0.564,
596
+ "step": 800
597
+ },
598
+ {
599
+ "epoch": 3.0587755102040814,
600
+ "grad_norm": 4.456672668457031,
601
+ "learning_rate": 1.8829401088929222e-05,
602
+ "loss": 0.5806,
603
+ "step": 810
604
+ },
605
+ {
606
+ "epoch": 3.066938775510204,
607
+ "grad_norm": 8.3887300491333,
608
+ "learning_rate": 1.837568058076225e-05,
609
+ "loss": 0.4955,
610
+ "step": 820
611
+ },
612
+ {
613
+ "epoch": 3.0751020408163265,
614
+ "grad_norm": 7.33532190322876,
615
+ "learning_rate": 1.792196007259528e-05,
616
+ "loss": 0.5767,
617
+ "step": 830
618
+ },
619
+ {
620
+ "epoch": 3.083265306122449,
621
+ "grad_norm": 15.156110763549805,
622
+ "learning_rate": 1.7468239564428312e-05,
623
+ "loss": 0.4801,
624
+ "step": 840
625
+ },
626
+ {
627
+ "epoch": 3.0914285714285716,
628
+ "grad_norm": 7.68253231048584,
629
+ "learning_rate": 1.7014519056261344e-05,
630
+ "loss": 0.5923,
631
+ "step": 850
632
+ },
633
+ {
634
+ "epoch": 3.0995918367346937,
635
+ "grad_norm": 11.319337844848633,
636
+ "learning_rate": 1.6560798548094377e-05,
637
+ "loss": 0.4914,
638
+ "step": 860
639
+ },
640
+ {
641
+ "epoch": 3.1077551020408163,
642
+ "grad_norm": 13.500849723815918,
643
+ "learning_rate": 1.6107078039927405e-05,
644
+ "loss": 0.5273,
645
+ "step": 870
646
+ },
647
+ {
648
+ "epoch": 3.115918367346939,
649
+ "grad_norm": 8.655684471130371,
650
+ "learning_rate": 1.5653357531760438e-05,
651
+ "loss": 0.5035,
652
+ "step": 880
653
+ },
654
+ {
655
+ "epoch": 3.1240816326530614,
656
+ "grad_norm": 7.286762237548828,
657
+ "learning_rate": 1.5199637023593466e-05,
658
+ "loss": 0.5134,
659
+ "step": 890
660
+ },
661
+ {
662
+ "epoch": 3.1322448979591835,
663
+ "grad_norm": 4.1852641105651855,
664
+ "learning_rate": 1.4745916515426497e-05,
665
+ "loss": 0.4418,
666
+ "step": 900
667
+ },
668
+ {
669
+ "epoch": 3.140408163265306,
670
+ "grad_norm": 15.295413970947266,
671
+ "learning_rate": 1.4292196007259529e-05,
672
+ "loss": 0.563,
673
+ "step": 910
674
+ },
675
+ {
676
+ "epoch": 3.1485714285714286,
677
+ "grad_norm": 14.049315452575684,
678
+ "learning_rate": 1.383847549909256e-05,
679
+ "loss": 0.5778,
680
+ "step": 920
681
+ },
682
+ {
683
+ "epoch": 3.156734693877551,
684
+ "grad_norm": 5.061805725097656,
685
+ "learning_rate": 1.3384754990925592e-05,
686
+ "loss": 0.5193,
687
+ "step": 930
688
+ },
689
+ {
690
+ "epoch": 3.1648979591836737,
691
+ "grad_norm": 3.378352403640747,
692
+ "learning_rate": 1.2931034482758622e-05,
693
+ "loss": 0.4423,
694
+ "step": 940
695
+ },
696
+ {
697
+ "epoch": 3.1730612244897958,
698
+ "grad_norm": 12.00133991241455,
699
+ "learning_rate": 1.2477313974591653e-05,
700
+ "loss": 0.3907,
701
+ "step": 950
702
+ },
703
+ {
704
+ "epoch": 3.1812244897959183,
705
+ "grad_norm": 5.577662944793701,
706
+ "learning_rate": 1.2023593466424683e-05,
707
+ "loss": 0.5331,
708
+ "step": 960
709
+ },
710
+ {
711
+ "epoch": 3.189387755102041,
712
+ "grad_norm": 10.945758819580078,
713
+ "learning_rate": 1.1569872958257714e-05,
714
+ "loss": 0.5347,
715
+ "step": 970
716
+ },
717
+ {
718
+ "epoch": 3.1975510204081634,
719
+ "grad_norm": 8.133883476257324,
720
+ "learning_rate": 1.1116152450090744e-05,
721
+ "loss": 0.5847,
722
+ "step": 980
723
+ },
724
+ {
725
+ "epoch": 3.2008163265306124,
726
+ "eval_accuracy": 0.7397660818713451,
727
+ "eval_loss": 1.1260349750518799,
728
+ "eval_runtime": 756.6531,
729
+ "eval_samples_per_second": 2.26,
730
+ "eval_steps_per_second": 0.095,
731
+ "step": 984
732
+ },
733
+ {
734
+ "epoch": 4.0048979591836735,
735
+ "grad_norm": 10.616361618041992,
736
+ "learning_rate": 1.0662431941923776e-05,
737
+ "loss": 0.511,
738
+ "step": 990
739
+ },
740
+ {
741
+ "epoch": 4.013061224489796,
742
+ "grad_norm": 17.12271499633789,
743
+ "learning_rate": 1.0208711433756807e-05,
744
+ "loss": 0.5201,
745
+ "step": 1000
746
+ },
747
+ {
748
+ "epoch": 4.021224489795919,
749
+ "grad_norm": 7.298173427581787,
750
+ "learning_rate": 9.754990925589837e-06,
751
+ "loss": 0.4441,
752
+ "step": 1010
753
+ },
754
+ {
755
+ "epoch": 4.029387755102041,
756
+ "grad_norm": 11.826898574829102,
757
+ "learning_rate": 9.301270417422868e-06,
758
+ "loss": 0.3371,
759
+ "step": 1020
760
+ },
761
+ {
762
+ "epoch": 4.037551020408163,
763
+ "grad_norm": 8.964632987976074,
764
+ "learning_rate": 8.8475499092559e-06,
765
+ "loss": 0.3911,
766
+ "step": 1030
767
+ },
768
+ {
769
+ "epoch": 4.045714285714285,
770
+ "grad_norm": 6.960618495941162,
771
+ "learning_rate": 8.393829401088929e-06,
772
+ "loss": 0.4594,
773
+ "step": 1040
774
+ },
775
+ {
776
+ "epoch": 4.053877551020408,
777
+ "grad_norm": 4.924427509307861,
778
+ "learning_rate": 7.94010889292196e-06,
779
+ "loss": 0.5469,
780
+ "step": 1050
781
+ },
782
+ {
783
+ "epoch": 4.0620408163265305,
784
+ "grad_norm": 4.707489967346191,
785
+ "learning_rate": 7.486388384754991e-06,
786
+ "loss": 0.3609,
787
+ "step": 1060
788
+ },
789
+ {
790
+ "epoch": 4.070204081632653,
791
+ "grad_norm": 10.195731163024902,
792
+ "learning_rate": 7.0326678765880225e-06,
793
+ "loss": 0.4136,
794
+ "step": 1070
795
+ },
796
+ {
797
+ "epoch": 4.078367346938776,
798
+ "grad_norm": 7.881709098815918,
799
+ "learning_rate": 6.578947368421053e-06,
800
+ "loss": 0.3424,
801
+ "step": 1080
802
+ },
803
+ {
804
+ "epoch": 4.086530612244898,
805
+ "grad_norm": 12.66398811340332,
806
+ "learning_rate": 6.1252268602540835e-06,
807
+ "loss": 0.3489,
808
+ "step": 1090
809
+ },
810
+ {
811
+ "epoch": 4.094693877551021,
812
+ "grad_norm": 8.235493659973145,
813
+ "learning_rate": 5.671506352087114e-06,
814
+ "loss": 0.3887,
815
+ "step": 1100
816
+ },
817
+ {
818
+ "epoch": 4.102857142857143,
819
+ "grad_norm": 10.095196723937988,
820
+ "learning_rate": 5.217785843920145e-06,
821
+ "loss": 0.4551,
822
+ "step": 1110
823
+ },
824
+ {
825
+ "epoch": 4.111020408163265,
826
+ "grad_norm": 5.204263687133789,
827
+ "learning_rate": 4.764065335753176e-06,
828
+ "loss": 0.3242,
829
+ "step": 1120
830
+ },
831
+ {
832
+ "epoch": 4.119183673469387,
833
+ "grad_norm": 6.840783596038818,
834
+ "learning_rate": 4.310344827586207e-06,
835
+ "loss": 0.4693,
836
+ "step": 1130
837
+ },
838
+ {
839
+ "epoch": 4.12734693877551,
840
+ "grad_norm": 9.000433921813965,
841
+ "learning_rate": 3.8566243194192376e-06,
842
+ "loss": 0.4683,
843
+ "step": 1140
844
+ },
845
+ {
846
+ "epoch": 4.1355102040816325,
847
+ "grad_norm": 10.167008399963379,
848
+ "learning_rate": 3.4029038112522685e-06,
849
+ "loss": 0.4362,
850
+ "step": 1150
851
+ },
852
+ {
853
+ "epoch": 4.143673469387755,
854
+ "grad_norm": 4.929052352905273,
855
+ "learning_rate": 2.9491833030852998e-06,
856
+ "loss": 0.3308,
857
+ "step": 1160
858
+ },
859
+ {
860
+ "epoch": 4.151836734693878,
861
+ "grad_norm": 9.885331153869629,
862
+ "learning_rate": 2.4954627949183303e-06,
863
+ "loss": 0.3296,
864
+ "step": 1170
865
+ },
866
+ {
867
+ "epoch": 4.16,
868
+ "grad_norm": 8.994477272033691,
869
+ "learning_rate": 2.041742286751361e-06,
870
+ "loss": 0.3484,
871
+ "step": 1180
872
+ },
873
+ {
874
+ "epoch": 4.168163265306123,
875
+ "grad_norm": 10.2811918258667,
876
+ "learning_rate": 1.588021778584392e-06,
877
+ "loss": 0.4507,
878
+ "step": 1190
879
+ },
880
+ {
881
+ "epoch": 4.176326530612245,
882
+ "grad_norm": 4.625392913818359,
883
+ "learning_rate": 1.134301270417423e-06,
884
+ "loss": 0.3656,
885
+ "step": 1200
886
+ },
887
+ {
888
+ "epoch": 4.184489795918367,
889
+ "grad_norm": 4.338553428649902,
890
+ "learning_rate": 6.805807622504538e-07,
891
+ "loss": 0.4143,
892
+ "step": 1210
893
+ },
894
+ {
895
+ "epoch": 4.192653061224489,
896
+ "grad_norm": 7.271970748901367,
897
+ "learning_rate": 2.268602540834846e-07,
898
+ "loss": 0.4343,
899
+ "step": 1220
900
+ },
901
+ {
902
+ "epoch": 4.196734693877551,
903
+ "eval_accuracy": 0.7619883040935672,
904
+ "eval_loss": 1.0475685596466064,
905
+ "eval_runtime": 742.0109,
906
+ "eval_samples_per_second": 2.305,
907
+ "eval_steps_per_second": 0.097,
908
+ "step": 1225
909
+ },
910
+ {
911
+ "epoch": 4.196734693877551,
912
+ "step": 1225,
913
+ "total_flos": 3.6552500125909254e+19,
914
+ "train_loss": 0.8672810672253979,
915
+ "train_runtime": 16546.435,
916
+ "train_samples_per_second": 1.777,
917
+ "train_steps_per_second": 0.074
918
+ },
919
+ {
920
+ "epoch": 4.196734693877551,
921
+ "eval_accuracy": 0.8441558441558441,
922
+ "eval_loss": 0.5236970782279968,
923
+ "eval_runtime": 645.1565,
924
+ "eval_samples_per_second": 2.268,
925
+ "eval_steps_per_second": 0.095,
926
+ "step": 1225
927
+ },
928
+ {
929
+ "epoch": 4.196734693877551,
930
+ "eval_accuracy": 0.8441558441558441,
931
+ "eval_loss": 0.5236971378326416,
932
+ "eval_runtime": 632.6262,
933
+ "eval_samples_per_second": 2.313,
934
+ "eval_steps_per_second": 0.096,
935
+ "step": 1225
936
+ }
937
+ ],
938
+ "logging_steps": 10,
939
+ "max_steps": 1225,
940
+ "num_input_tokens_seen": 0,
941
+ "num_train_epochs": 9223372036854775807,
942
+ "save_steps": 500,
943
+ "stateful_callbacks": {
944
+ "TrainerControl": {
945
+ "args": {
946
+ "should_epoch_stop": false,
947
+ "should_evaluate": false,
948
+ "should_log": false,
949
+ "should_save": true,
950
+ "should_training_stop": true
951
+ },
952
+ "attributes": {}
953
+ }
954
+ },
955
+ "total_flos": 3.6552500125909254e+19,
956
+ "train_batch_size": 24,
957
+ "trial_name": null,
958
+ "trial_params": null
959
+ }