youdiniplays commited on
Commit
f8125cb
·
verified ·
1 Parent(s): ea040b9

Training in progress, step 20500

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ad617c3b1499306b38fa543b46003af6ade42388383994c0c5d60da4a2ad7c4
3
  size 242041896
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c43564e930260fa3abbd24ef8bc8e2019043f8ab0ea3f357c88632def606095f
3
  size 242041896
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:facd69ad7e7e0b449379616f05089d4168b12087b28ef0b17bf7a61dd5f45fd5
3
  size 484163514
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5157a79613ec248ac8f7d1c8901dc35a0467ddf892a8d4eace27612fd7b9473d
3
  size 484163514
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3425462108d6fd7d6abec9df52dee17dc63dfbe7caf986228c6ea596204901b9
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af796267785b8380aa702da74322c3a84f42d9cab004eb98c1af133c0218d64a
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4583ca186954ad348c3105fd02b08775e0c9a44b9f82b90dc8c09ccb4b5dd21c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b2103b3ba42c5a974b79f170c6d33390323651972016338595643ea512a6c2b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.0693677102516883,
5
  "eval_steps": 500,
6
- "global_step": 20000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -11,279 +11,439 @@
11
  {
12
  "epoch": 0.08,
13
  "learning_rate": 0.0009846531614487416,
14
- "loss": 0.3358,
15
  "step": 500
16
  },
17
  {
18
  "epoch": 0.15,
19
  "learning_rate": 0.0009693063228974831,
20
- "loss": 0.3723,
21
  "step": 1000
22
  },
23
  {
24
  "epoch": 0.23,
25
  "learning_rate": 0.0009539594843462247,
26
- "loss": 0.3901,
27
  "step": 1500
28
  },
29
  {
30
  "epoch": 0.31,
31
  "learning_rate": 0.0009386126457949663,
32
- "loss": 0.4022,
33
  "step": 2000
34
  },
35
  {
36
  "epoch": 0.38,
37
  "learning_rate": 0.0009232658072437078,
38
- "loss": 0.4109,
39
  "step": 2500
40
  },
41
  {
42
  "epoch": 0.46,
43
  "learning_rate": 0.0009079189686924494,
44
- "loss": 0.4132,
45
  "step": 3000
46
  },
47
  {
48
  "epoch": 0.54,
49
  "learning_rate": 0.000892572130141191,
50
- "loss": 0.4197,
51
  "step": 3500
52
  },
53
  {
54
  "epoch": 0.61,
55
  "learning_rate": 0.0008772252915899325,
56
- "loss": 0.4227,
57
  "step": 4000
58
  },
59
  {
60
  "epoch": 0.69,
61
- "learning_rate": 0.0008618784530386741,
62
- "loss": 0.424,
63
  "step": 4500
64
  },
65
  {
66
  "epoch": 0.77,
67
- "learning_rate": 0.0008465316144874156,
68
- "loss": 0.4228,
69
  "step": 5000
70
  },
71
  {
72
  "epoch": 0.84,
73
- "learning_rate": 0.0008311847759361571,
74
- "loss": 0.4246,
75
  "step": 5500
76
  },
77
  {
78
  "epoch": 0.92,
79
- "learning_rate": 0.0008158379373848988,
80
- "loss": 0.4268,
81
  "step": 6000
82
  },
83
  {
84
  "epoch": 1.0,
85
- "learning_rate": 0.0008005217925107428,
86
- "loss": 0.4316,
87
  "step": 6500
88
  },
89
  {
90
  "epoch": 1.0,
91
- "eval_bleu": 4.2677,
92
- "eval_gen_len": 18.024,
93
- "eval_loss": 0.45959702134132385,
94
- "eval_runtime": 23.3818,
95
- "eval_samples_per_second": 42.768,
96
- "eval_steps_per_second": 2.694,
97
  "step": 6516
98
  },
99
  {
100
  "epoch": 1.07,
101
- "learning_rate": 0.0007851749539594844,
102
- "loss": 0.3897,
103
  "step": 7000
104
  },
105
  {
106
  "epoch": 1.15,
107
- "learning_rate": 0.0007698281154082259,
108
- "loss": 0.3901,
109
  "step": 7500
110
  },
111
  {
112
  "epoch": 1.23,
113
- "learning_rate": 0.0007544812768569675,
114
- "loss": 0.3965,
115
  "step": 8000
116
  },
117
  {
118
  "epoch": 1.3,
119
- "learning_rate": 0.0007391651319828116,
120
- "loss": 0.394,
121
  "step": 8500
122
  },
123
  {
124
  "epoch": 1.38,
125
- "learning_rate": 0.0007238182934315531,
126
- "loss": 0.4003,
127
  "step": 9000
128
  },
129
  {
130
  "epoch": 1.46,
131
- "learning_rate": 0.0007084714548802946,
132
- "loss": 0.403,
133
  "step": 9500
134
  },
135
  {
136
  "epoch": 1.53,
137
- "learning_rate": 0.0006931246163290362,
138
- "loss": 0.4067,
139
  "step": 10000
140
  },
141
  {
142
  "epoch": 1.61,
143
- "learning_rate": 0.0006778084714548804,
144
- "loss": 0.4067,
145
  "step": 10500
146
  },
147
  {
148
  "epoch": 1.69,
149
- "learning_rate": 0.0006624616329036218,
150
- "loss": 0.4047,
151
  "step": 11000
152
  },
153
  {
154
  "epoch": 1.76,
155
- "learning_rate": 0.0006471147943523635,
156
- "loss": 0.4032,
157
  "step": 11500
158
  },
159
  {
160
  "epoch": 1.84,
161
- "learning_rate": 0.000631767955801105,
162
- "loss": 0.4077,
163
  "step": 12000
164
  },
165
  {
166
  "epoch": 1.92,
167
- "learning_rate": 0.0006164825046040515,
168
- "loss": 0.4062,
169
  "step": 12500
170
  },
171
  {
172
  "epoch": 2.0,
173
- "learning_rate": 0.0006011356660527931,
174
- "loss": 0.4074,
175
  "step": 13000
176
  },
177
  {
178
  "epoch": 2.0,
179
- "eval_bleu": 4.2659,
180
- "eval_gen_len": 18.019,
181
- "eval_loss": 0.45938295125961304,
182
- "eval_runtime": 24.1279,
183
- "eval_samples_per_second": 41.446,
184
- "eval_steps_per_second": 2.611,
185
  "step": 13032
186
  },
187
  {
188
  "epoch": 2.07,
189
  "learning_rate": 0.0005857888275015347,
190
- "loss": 0.3667,
191
  "step": 13500
192
  },
193
  {
194
  "epoch": 2.15,
195
  "learning_rate": 0.0005704419889502763,
196
- "loss": 0.3674,
197
  "step": 14000
198
  },
199
  {
200
  "epoch": 2.23,
201
  "learning_rate": 0.0005550951503990178,
202
- "loss": 0.3749,
203
  "step": 14500
204
  },
205
  {
206
  "epoch": 2.3,
207
  "learning_rate": 0.0005397483118477594,
208
- "loss": 0.3707,
209
  "step": 15000
210
  },
211
  {
212
  "epoch": 2.38,
213
  "learning_rate": 0.0005244014732965009,
214
- "loss": 0.3676,
215
  "step": 15500
216
  },
217
  {
218
  "epoch": 2.46,
219
- "learning_rate": 0.0005090546347452425,
220
- "loss": 0.3761,
221
  "step": 16000
222
  },
223
  {
224
  "epoch": 2.53,
225
  "learning_rate": 0.0004937384898710866,
226
- "loss": 0.3795,
227
  "step": 16500
228
  },
229
  {
230
  "epoch": 2.61,
231
  "learning_rate": 0.00047839165131982814,
232
- "loss": 0.377,
233
  "step": 17000
234
  },
235
  {
236
  "epoch": 2.69,
237
  "learning_rate": 0.0004630448127685697,
238
- "loss": 0.3804,
239
  "step": 17500
240
  },
241
  {
242
  "epoch": 2.76,
243
- "learning_rate": 0.00044769797421731125,
244
- "loss": 0.3824,
245
  "step": 18000
246
  },
247
  {
248
  "epoch": 2.84,
249
  "learning_rate": 0.00043238182934315537,
250
- "loss": 0.3724,
251
  "step": 18500
252
  },
253
  {
254
  "epoch": 2.92,
255
  "learning_rate": 0.00041703499079189687,
256
- "loss": 0.3795,
257
  "step": 19000
258
  },
259
  {
260
  "epoch": 2.99,
261
  "learning_rate": 0.0004016881522406384,
262
- "loss": 0.3833,
263
  "step": 19500
264
  },
265
  {
266
  "epoch": 3.0,
267
- "eval_bleu": 4.3571,
268
- "eval_gen_len": 18.015,
269
- "eval_loss": 0.4461449086666107,
270
- "eval_runtime": 23.8381,
271
- "eval_samples_per_second": 41.95,
272
- "eval_steps_per_second": 2.643,
273
  "step": 19548
274
  },
275
  {
276
  "epoch": 3.07,
277
- "learning_rate": 0.00038634131368938,
278
- "loss": 0.3423,
279
  "step": 20000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
280
  }
281
  ],
282
  "logging_steps": 500,
283
  "max_steps": 32580,
284
  "num_train_epochs": 5,
285
  "save_steps": 500,
286
- "total_flos": 1.0611816528347136e+16,
287
  "trial_name": null,
288
  "trial_params": null
289
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.987722529158994,
5
  "eval_steps": 500,
6
+ "global_step": 32500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
11
  {
12
  "epoch": 0.08,
13
  "learning_rate": 0.0009846531614487416,
14
+ "loss": 0.5762,
15
  "step": 500
16
  },
17
  {
18
  "epoch": 0.15,
19
  "learning_rate": 0.0009693063228974831,
20
+ "loss": 0.6042,
21
  "step": 1000
22
  },
23
  {
24
  "epoch": 0.23,
25
  "learning_rate": 0.0009539594843462247,
26
+ "loss": 0.6024,
27
  "step": 1500
28
  },
29
  {
30
  "epoch": 0.31,
31
  "learning_rate": 0.0009386126457949663,
32
+ "loss": 0.6057,
33
  "step": 2000
34
  },
35
  {
36
  "epoch": 0.38,
37
  "learning_rate": 0.0009232658072437078,
38
+ "loss": 0.6143,
39
  "step": 2500
40
  },
41
  {
42
  "epoch": 0.46,
43
  "learning_rate": 0.0009079189686924494,
44
+ "loss": 0.6081,
45
  "step": 3000
46
  },
47
  {
48
  "epoch": 0.54,
49
  "learning_rate": 0.000892572130141191,
50
+ "loss": 0.611,
51
  "step": 3500
52
  },
53
  {
54
  "epoch": 0.61,
55
  "learning_rate": 0.0008772252915899325,
56
+ "loss": 0.6104,
57
  "step": 4000
58
  },
59
  {
60
  "epoch": 0.69,
61
+ "learning_rate": 0.0008619091467157766,
62
+ "loss": 0.6154,
63
  "step": 4500
64
  },
65
  {
66
  "epoch": 0.77,
67
+ "learning_rate": 0.0008465623081645181,
68
+ "loss": 0.6151,
69
  "step": 5000
70
  },
71
  {
72
  "epoch": 0.84,
73
+ "learning_rate": 0.0008312154696132597,
74
+ "loss": 0.611,
75
  "step": 5500
76
  },
77
  {
78
  "epoch": 0.92,
79
+ "learning_rate": 0.0008158686310620012,
80
+ "loss": 0.617,
81
  "step": 6000
82
  },
83
  {
84
  "epoch": 1.0,
85
+ "learning_rate": 0.0008005524861878454,
86
+ "loss": 0.6089,
87
  "step": 6500
88
  },
89
  {
90
  "epoch": 1.0,
91
+ "eval_bleu": 3.6822,
92
+ "eval_gen_len": 18.235,
93
+ "eval_loss": 0.506125271320343,
94
+ "eval_runtime": 27.1471,
95
+ "eval_samples_per_second": 36.836,
96
+ "eval_steps_per_second": 2.321,
97
  "step": 6516
98
  },
99
  {
100
  "epoch": 1.07,
101
+ "learning_rate": 0.0007852056476365869,
102
+ "loss": 0.5657,
103
  "step": 7000
104
  },
105
  {
106
  "epoch": 1.15,
107
+ "learning_rate": 0.0007698588090853285,
108
+ "loss": 0.5623,
109
  "step": 7500
110
  },
111
  {
112
  "epoch": 1.23,
113
+ "learning_rate": 0.0007545119705340699,
114
+ "loss": 0.5669,
115
  "step": 8000
116
  },
117
  {
118
  "epoch": 1.3,
119
+ "learning_rate": 0.000739195825659914,
120
+ "loss": 0.5793,
121
  "step": 8500
122
  },
123
  {
124
  "epoch": 1.38,
125
+ "learning_rate": 0.0007238489871086557,
126
+ "loss": 0.5743,
127
  "step": 9000
128
  },
129
  {
130
  "epoch": 1.46,
131
+ "learning_rate": 0.0007085328422344997,
132
+ "loss": 0.5722,
133
  "step": 9500
134
  },
135
  {
136
  "epoch": 1.53,
137
+ "learning_rate": 0.0006931860036832413,
138
+ "loss": 0.5749,
139
  "step": 10000
140
  },
141
  {
142
  "epoch": 1.61,
143
+ "learning_rate": 0.0006778391651319828,
144
+ "loss": 0.5776,
145
  "step": 10500
146
  },
147
  {
148
  "epoch": 1.69,
149
+ "learning_rate": 0.0006624923265807244,
150
+ "loss": 0.5694,
151
  "step": 11000
152
  },
153
  {
154
  "epoch": 1.76,
155
+ "learning_rate": 0.0006471454880294659,
156
+ "loss": 0.5716,
157
  "step": 11500
158
  },
159
  {
160
  "epoch": 1.84,
161
+ "learning_rate": 0.0006317986494782076,
162
+ "loss": 0.5665,
163
  "step": 12000
164
  },
165
  {
166
  "epoch": 1.92,
167
+ "learning_rate": 0.000616451810926949,
168
+ "loss": 0.5671,
169
  "step": 12500
170
  },
171
  {
172
  "epoch": 2.0,
173
+ "learning_rate": 0.0006011049723756906,
174
+ "loss": 0.5687,
175
  "step": 13000
176
  },
177
  {
178
  "epoch": 2.0,
179
+ "eval_bleu": 3.7119,
180
+ "eval_gen_len": 18.222,
181
+ "eval_loss": 0.495604932308197,
182
+ "eval_runtime": 23.8741,
183
+ "eval_samples_per_second": 41.886,
184
+ "eval_steps_per_second": 2.639,
185
  "step": 13032
186
  },
187
  {
188
  "epoch": 2.07,
189
  "learning_rate": 0.0005857888275015347,
190
+ "loss": 0.5307,
191
  "step": 13500
192
  },
193
  {
194
  "epoch": 2.15,
195
  "learning_rate": 0.0005704419889502763,
196
+ "loss": 0.5325,
197
  "step": 14000
198
  },
199
  {
200
  "epoch": 2.23,
201
  "learning_rate": 0.0005550951503990178,
202
+ "loss": 0.5353,
203
  "step": 14500
204
  },
205
  {
206
  "epoch": 2.3,
207
  "learning_rate": 0.0005397483118477594,
208
+ "loss": 0.5289,
209
  "step": 15000
210
  },
211
  {
212
  "epoch": 2.38,
213
  "learning_rate": 0.0005244014732965009,
214
+ "loss": 0.5321,
215
  "step": 15500
216
  },
217
  {
218
  "epoch": 2.46,
219
+ "learning_rate": 0.000509085328422345,
220
+ "loss": 0.5271,
221
  "step": 16000
222
  },
223
  {
224
  "epoch": 2.53,
225
  "learning_rate": 0.0004937384898710866,
226
+ "loss": 0.5295,
227
  "step": 16500
228
  },
229
  {
230
  "epoch": 2.61,
231
  "learning_rate": 0.00047839165131982814,
232
+ "loss": 0.524,
233
  "step": 17000
234
  },
235
  {
236
  "epoch": 2.69,
237
  "learning_rate": 0.0004630448127685697,
238
+ "loss": 0.526,
239
  "step": 17500
240
  },
241
  {
242
  "epoch": 2.76,
243
+ "learning_rate": 0.00044772866789441376,
244
+ "loss": 0.5323,
245
  "step": 18000
246
  },
247
  {
248
  "epoch": 2.84,
249
  "learning_rate": 0.00043238182934315537,
250
+ "loss": 0.5236,
251
  "step": 18500
252
  },
253
  {
254
  "epoch": 2.92,
255
  "learning_rate": 0.00041703499079189687,
256
+ "loss": 0.5266,
257
  "step": 19000
258
  },
259
  {
260
  "epoch": 2.99,
261
  "learning_rate": 0.0004016881522406384,
262
+ "loss": 0.525,
263
  "step": 19500
264
  },
265
  {
266
  "epoch": 3.0,
267
+ "eval_bleu": 3.8722,
268
+ "eval_gen_len": 18.219,
269
+ "eval_loss": 0.4868793785572052,
270
+ "eval_runtime": 23.2067,
271
+ "eval_samples_per_second": 43.091,
272
+ "eval_steps_per_second": 2.715,
273
  "step": 19548
274
  },
275
  {
276
  "epoch": 3.07,
277
+ "learning_rate": 0.00038637200736648254,
278
+ "loss": 0.4934,
279
  "step": 20000
280
+ },
281
+ {
282
+ "epoch": 3.15,
283
+ "learning_rate": 0.0003710251688152241,
284
+ "loss": 0.4869,
285
+ "step": 20500
286
+ },
287
+ {
288
+ "epoch": 3.22,
289
+ "learning_rate": 0.0003556783302639656,
290
+ "loss": 0.4917,
291
+ "step": 21000
292
+ },
293
+ {
294
+ "epoch": 3.3,
295
+ "learning_rate": 0.00034033149171270715,
296
+ "loss": 0.4841,
297
+ "step": 21500
298
+ },
299
+ {
300
+ "epoch": 3.38,
301
+ "learning_rate": 0.00032501534683855126,
302
+ "loss": 0.4906,
303
+ "step": 22000
304
+ },
305
+ {
306
+ "epoch": 3.45,
307
+ "learning_rate": 0.0003096685082872928,
308
+ "loss": 0.4884,
309
+ "step": 22500
310
+ },
311
+ {
312
+ "epoch": 3.53,
313
+ "learning_rate": 0.0002943216697360344,
314
+ "loss": 0.486,
315
+ "step": 23000
316
+ },
317
+ {
318
+ "epoch": 3.61,
319
+ "learning_rate": 0.00027897483118477593,
320
+ "loss": 0.4932,
321
+ "step": 23500
322
+ },
323
+ {
324
+ "epoch": 3.68,
325
+ "learning_rate": 0.00026365868631062,
326
+ "loss": 0.4951,
327
+ "step": 24000
328
+ },
329
+ {
330
+ "epoch": 3.76,
331
+ "learning_rate": 0.0002483118477593616,
332
+ "loss": 0.4896,
333
+ "step": 24500
334
+ },
335
+ {
336
+ "epoch": 3.84,
337
+ "learning_rate": 0.00023296500920810313,
338
+ "loss": 0.4832,
339
+ "step": 25000
340
+ },
341
+ {
342
+ "epoch": 3.91,
343
+ "learning_rate": 0.00021761817065684468,
344
+ "loss": 0.4874,
345
+ "step": 25500
346
+ },
347
+ {
348
+ "epoch": 3.99,
349
+ "learning_rate": 0.00020230202578268877,
350
+ "loss": 0.488,
351
+ "step": 26000
352
+ },
353
+ {
354
+ "epoch": 4.0,
355
+ "eval_bleu": 3.7886,
356
+ "eval_gen_len": 18.23,
357
+ "eval_loss": 0.4795362949371338,
358
+ "eval_runtime": 23.2422,
359
+ "eval_samples_per_second": 43.025,
360
+ "eval_steps_per_second": 2.711,
361
+ "step": 26064
362
+ },
363
+ {
364
+ "epoch": 4.07,
365
+ "learning_rate": 0.00018695518723143033,
366
+ "loss": 0.4583,
367
+ "step": 26500
368
+ },
369
+ {
370
+ "epoch": 4.14,
371
+ "learning_rate": 0.00017160834868017188,
372
+ "loss": 0.4495,
373
+ "step": 27000
374
+ },
375
+ {
376
+ "epoch": 4.22,
377
+ "learning_rate": 0.00015626151012891346,
378
+ "loss": 0.454,
379
+ "step": 27500
380
+ },
381
+ {
382
+ "epoch": 4.3,
383
+ "learning_rate": 0.00014094536525475752,
384
+ "loss": 0.4547,
385
+ "step": 28000
386
+ },
387
+ {
388
+ "epoch": 4.37,
389
+ "learning_rate": 0.00012559852670349908,
390
+ "loss": 0.4588,
391
+ "step": 28500
392
+ },
393
+ {
394
+ "epoch": 4.45,
395
+ "learning_rate": 0.00011025168815224063,
396
+ "loss": 0.453,
397
+ "step": 29000
398
+ },
399
+ {
400
+ "epoch": 4.53,
401
+ "learning_rate": 9.49048496009822e-05,
402
+ "loss": 0.4549,
403
+ "step": 29500
404
+ },
405
+ {
406
+ "epoch": 4.6,
407
+ "learning_rate": 7.955801104972376e-05,
408
+ "loss": 0.4533,
409
+ "step": 30000
410
+ },
411
+ {
412
+ "epoch": 4.68,
413
+ "learning_rate": 6.424186617556783e-05,
414
+ "loss": 0.4495,
415
+ "step": 30500
416
+ },
417
+ {
418
+ "epoch": 4.76,
419
+ "learning_rate": 4.8895027624309394e-05,
420
+ "loss": 0.4555,
421
+ "step": 31000
422
+ },
423
+ {
424
+ "epoch": 4.83,
425
+ "learning_rate": 3.3548189073050956e-05,
426
+ "loss": 0.4563,
427
+ "step": 31500
428
+ },
429
+ {
430
+ "epoch": 4.91,
431
+ "learning_rate": 1.820135052179251e-05,
432
+ "loss": 0.4563,
433
+ "step": 32000
434
+ },
435
+ {
436
+ "epoch": 4.99,
437
+ "learning_rate": 2.885205647636587e-06,
438
+ "loss": 0.4527,
439
+ "step": 32500
440
  }
441
  ],
442
  "logging_steps": 500,
443
  "max_steps": 32580,
444
  "num_train_epochs": 5,
445
  "save_steps": 500,
446
+ "total_flos": 1.7233985138589696e+16,
447
  "trial_name": null,
448
  "trial_params": null
449
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d9e5fd144c9e568216db4505e71aa6bc722764749fdf639a5e84c071e512165
3
  size 4792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbbc9f6fd02ff4264ade394c37847b822a605f5a9ef106ad63ba6639ed6bf7c2
3
  size 4792
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ad617c3b1499306b38fa543b46003af6ade42388383994c0c5d60da4a2ad7c4
3
  size 242041896
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f41bb533aa86e301a1be3b65f96c1170612ae823deb447e90a35a957837ed0d
3
  size 242041896
runs/Jan21_09-30-28_854346de9726/events.out.tfevents.1705829428.854346de9726.164.1 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6a32975b5993ef40577b9ca1b2fc971641bad472d23ef0d4949d476ea8cbf8b8
3
- size 12689
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78e7c058478c3d5d58805cbb2ff89ae6ffe9f56ad419227b33a3cfe9558d088e
3
+ size 12849