ma2za commited on
Commit
a1d4ae3
·
1 Parent(s): c002dea

Training in progress, step 400

Browse files
config.json CHANGED
@@ -39,7 +39,7 @@
39
  "pad_token_id": 1,
40
  "position_embedding_type": "absolute",
41
  "torch_dtype": "float32",
42
- "transformers_version": "4.30.1",
43
  "type_vocab_size": 1,
44
  "use_cache": true,
45
  "vocab_size": 250002
 
39
  "pad_token_id": 1,
40
  "position_embedding_type": "absolute",
41
  "torch_dtype": "float32",
42
+ "transformers_version": "4.30.2",
43
  "type_vocab_size": 1,
44
  "use_cache": true,
45
  "vocab_size": 250002
last-checkpoint/config.json CHANGED
@@ -39,7 +39,7 @@
39
  "pad_token_id": 1,
40
  "position_embedding_type": "absolute",
41
  "torch_dtype": "float32",
42
- "transformers_version": "4.30.1",
43
  "type_vocab_size": 1,
44
  "use_cache": true,
45
  "vocab_size": 250002
 
39
  "pad_token_id": 1,
40
  "position_embedding_type": "absolute",
41
  "torch_dtype": "float32",
42
+ "transformers_version": "4.30.2",
43
  "type_vocab_size": 1,
44
  "use_cache": true,
45
  "vocab_size": 250002
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:90ac490256f647949f42695d32b73cba0a101fb95f10d5a619cb5660ba10ff2d
3
- size 2067085189
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d616098abea3fdeeea247fed78a11358c8181177518c9e2cc80e894db289c6c
3
+ size 2117841669
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47de52e58786b8b98181cbc186cb69933573acd6041e65760d03400d3c901a9f
3
- size 2242911029
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88eafde7549bbcb49017355eaeb21a46d8f90e1af6a6b1611cf95b3efd4e3a31
3
+ size 2268278205
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d784389a18bf7e45927e65e8221eaa92ccc2c44f81f42e83165f180c191f35dc
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fc93f677e3ae1c54a0f660d829958d1fc6a45e644c99a73005a32d0415718d5
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:876e9140cb29ecc9a1f1514854b3ff5776e33c28372ab87b8f728fd017b65ba0
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bce84bf308f24fcda8499bb3d8796270be528f26412bc83d531ac8dd12f16446
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,312 +1,26 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.881427072402938,
5
- "global_step": 8400,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.04,
12
- "eval_accuracy": 0.6216116513450103,
13
- "eval_f1": 0.5350163270245339,
14
- "eval_loss": 1.2418967485427856,
15
- "eval_runtime": 545.576,
16
- "eval_samples_per_second": 248.431,
17
- "eval_steps_per_second": 0.971,
18
  "step": 400
19
- },
20
- {
21
- "epoch": 0.05,
22
- "learning_rate": 9.994805876180483e-06,
23
- "loss": 1.4969,
24
- "step": 500
25
- },
26
- {
27
- "epoch": 0.08,
28
- "eval_accuracy": 0.741024657291682,
29
- "eval_f1": 0.7346338559187492,
30
- "eval_loss": 1.0078158378601074,
31
- "eval_runtime": 547.2541,
32
- "eval_samples_per_second": 247.669,
33
- "eval_steps_per_second": 0.968,
34
- "step": 800
35
- },
36
- {
37
- "epoch": 0.1,
38
- "learning_rate": 9.9895592864638e-06,
39
- "loss": 1.0807,
40
- "step": 1000
41
- },
42
- {
43
- "epoch": 0.13,
44
- "eval_accuracy": 0.7675633401702844,
45
- "eval_f1": 0.7670752144773497,
46
- "eval_loss": 0.9420500993728638,
47
- "eval_runtime": 547.3649,
48
- "eval_samples_per_second": 247.619,
49
- "eval_steps_per_second": 0.968,
50
- "step": 1200
51
- },
52
- {
53
- "epoch": 0.16,
54
- "learning_rate": 9.984323189926549e-06,
55
- "loss": 0.9987,
56
- "step": 1500
57
- },
58
- {
59
- "epoch": 0.17,
60
- "eval_accuracy": 0.7890333338252004,
61
- "eval_f1": 0.785083992813201,
62
- "eval_loss": 0.900003969669342,
63
- "eval_runtime": 547.6023,
64
- "eval_samples_per_second": 247.512,
65
- "eval_steps_per_second": 0.968,
66
- "step": 1600
67
- },
68
- {
69
- "epoch": 0.21,
70
- "learning_rate": 9.979129066107032e-06,
71
- "loss": 0.9487,
72
- "step": 2000
73
- },
74
- {
75
- "epoch": 0.21,
76
- "eval_accuracy": 0.8020112440791513,
77
- "eval_f1": 0.8005216483655947,
78
- "eval_loss": 0.871986448764801,
79
- "eval_runtime": 1177.1431,
80
- "eval_samples_per_second": 115.141,
81
- "eval_steps_per_second": 0.45,
82
- "step": 2000
83
- },
84
- {
85
- "epoch": 0.25,
86
- "eval_accuracy": 0.8086809603210907,
87
- "eval_f1": 0.8075228822632605,
88
- "eval_loss": 0.8556529879570007,
89
- "eval_runtime": 1163.7229,
90
- "eval_samples_per_second": 116.469,
91
- "eval_steps_per_second": 0.455,
92
- "step": 2400
93
- },
94
- {
95
- "epoch": 0.26,
96
- "learning_rate": 9.973966421825813e-06,
97
- "loss": 0.9126,
98
- "step": 2500
99
- },
100
- {
101
- "epoch": 0.29,
102
- "eval_accuracy": 0.8189585208576192,
103
- "eval_f1": 0.8165624760835816,
104
- "eval_loss": 0.8341620564460754,
105
- "eval_runtime": 553.1292,
106
- "eval_samples_per_second": 245.039,
107
- "eval_steps_per_second": 0.958,
108
- "step": 2800
109
- },
110
- {
111
- "epoch": 0.31,
112
- "learning_rate": 9.968730325288564e-06,
113
- "loss": 0.9058,
114
- "step": 3000
115
- },
116
- {
117
- "epoch": 0.34,
118
- "eval_accuracy": 0.8222122209269725,
119
- "eval_f1": 0.8218673977332275,
120
- "eval_loss": 0.828486442565918,
121
- "eval_runtime": 553.2621,
122
- "eval_samples_per_second": 244.98,
123
- "eval_steps_per_second": 0.958,
124
- "step": 3200
125
- },
126
- {
127
- "epoch": 0.37,
128
- "learning_rate": 9.963483735571879e-06,
129
- "loss": 0.8814,
130
- "step": 3500
131
- },
132
- {
133
- "epoch": 0.38,
134
- "eval_accuracy": 0.8263365255500302,
135
- "eval_f1": 0.8258586186708066,
136
- "eval_loss": 0.8168812990188599,
137
- "eval_runtime": 552.7858,
138
- "eval_samples_per_second": 245.191,
139
- "eval_steps_per_second": 0.959,
140
- "step": 3600
141
- },
142
- {
143
- "epoch": 0.42,
144
- "learning_rate": 9.958247639034628e-06,
145
- "loss": 0.8691,
146
- "step": 4000
147
- },
148
- {
149
- "epoch": 0.42,
150
- "eval_accuracy": 0.8314199707831014,
151
- "eval_f1": 0.831654438222622,
152
- "eval_loss": 0.8018559813499451,
153
- "eval_runtime": 553.5512,
154
- "eval_samples_per_second": 244.852,
155
- "eval_steps_per_second": 0.957,
156
- "step": 4000
157
- },
158
- {
159
- "epoch": 0.46,
160
- "eval_accuracy": 0.8344965987398368,
161
- "eval_f1": 0.8342882973172308,
162
- "eval_loss": 0.7967696189880371,
163
- "eval_runtime": 553.3959,
164
- "eval_samples_per_second": 244.921,
165
- "eval_steps_per_second": 0.958,
166
- "step": 4400
167
- },
168
- {
169
- "epoch": 0.47,
170
- "learning_rate": 9.953043022035678e-06,
171
- "loss": 0.8562,
172
- "step": 4500
173
- },
174
- {
175
- "epoch": 0.5,
176
- "eval_accuracy": 0.8366657321194056,
177
- "eval_f1": 0.835771965342758,
178
- "eval_loss": 0.791612982749939,
179
- "eval_runtime": 553.559,
180
- "eval_samples_per_second": 244.848,
181
- "eval_steps_per_second": 0.957,
182
- "step": 4800
183
- },
184
- {
185
- "epoch": 0.52,
186
- "learning_rate": 9.947869884575027e-06,
187
- "loss": 0.8383,
188
- "step": 5000
189
- },
190
- {
191
- "epoch": 0.55,
192
- "eval_accuracy": 0.8397792500996031,
193
- "eval_f1": 0.8395630994192902,
194
- "eval_loss": 0.780993640422821,
195
- "eval_runtime": 1166.8266,
196
- "eval_samples_per_second": 116.16,
197
- "eval_steps_per_second": 0.454,
198
- "step": 5200
199
- },
200
- {
201
- "epoch": 0.58,
202
- "learning_rate": 9.942665267576076e-06,
203
- "loss": 0.8322,
204
- "step": 5500
205
- },
206
- {
207
- "epoch": 0.59,
208
- "eval_accuracy": 0.8412179610146232,
209
- "eval_f1": 0.8414414317356113,
210
- "eval_loss": 0.7786160707473755,
211
- "eval_runtime": 1185.712,
212
- "eval_samples_per_second": 114.309,
213
- "eval_steps_per_second": 0.447,
214
- "step": 5600
215
- },
216
- {
217
- "epoch": 0.63,
218
- "learning_rate": 9.937492130115425e-06,
219
- "loss": 0.8246,
220
- "step": 6000
221
- },
222
- {
223
- "epoch": 0.63,
224
- "eval_accuracy": 0.8436600805678113,
225
- "eval_f1": 0.8442808035668997,
226
- "eval_loss": 0.7705245614051819,
227
- "eval_runtime": 1162.0224,
228
- "eval_samples_per_second": 116.64,
229
- "eval_steps_per_second": 0.456,
230
- "step": 6000
231
- },
232
- {
233
- "epoch": 0.68,
234
- "learning_rate": 9.932308499475342e-06,
235
- "loss": 0.8176,
236
- "step": 6500
237
- },
238
- {
239
- "epoch": 0.69,
240
- "eval_accuracy": 0.8434092284082693,
241
- "eval_f1": 0.8442125530374853,
242
- "eval_loss": 0.7737083435058594,
243
- "eval_runtime": 1178.0184,
244
- "eval_samples_per_second": 115.056,
245
- "eval_steps_per_second": 0.45,
246
- "step": 6600
247
- },
248
- {
249
- "epoch": 0.73,
250
- "learning_rate": 9.927114375655824e-06,
251
- "loss": 0.8124,
252
- "step": 7000
253
- },
254
- {
255
- "epoch": 0.76,
256
- "eval_accuracy": 0.8492673641340436,
257
- "eval_f1": 0.8495316142296381,
258
- "eval_loss": 0.7571645379066467,
259
- "eval_runtime": 1161.9628,
260
- "eval_samples_per_second": 116.646,
261
- "eval_steps_per_second": 0.456,
262
- "step": 7200
263
- },
264
- {
265
- "epoch": 0.79,
266
- "learning_rate": 9.921920251836308e-06,
267
- "loss": 0.8052,
268
- "step": 7500
269
- },
270
- {
271
- "epoch": 0.8,
272
- "eval_accuracy": 0.8490829140167333,
273
- "eval_f1": 0.8487398376232175,
274
- "eval_loss": 0.7558021545410156,
275
- "eval_runtime": 599.2259,
276
- "eval_samples_per_second": 226.188,
277
- "eval_steps_per_second": 0.884,
278
- "step": 7600
279
- },
280
- {
281
- "epoch": 0.84,
282
- "learning_rate": 9.916684155299056e-06,
283
- "loss": 0.8027,
284
- "step": 8000
285
- },
286
- {
287
- "epoch": 0.84,
288
- "eval_accuracy": 0.8517094836872316,
289
- "eval_f1": 0.8520268181955559,
290
- "eval_loss": 0.7500344514846802,
291
- "eval_runtime": 599.1188,
292
- "eval_samples_per_second": 226.229,
293
- "eval_steps_per_second": 0.885,
294
- "step": 8000
295
- },
296
- {
297
- "epoch": 0.88,
298
- "eval_accuracy": 0.8508462571382195,
299
- "eval_f1": 0.8516035377346363,
300
- "eval_loss": 0.7509218454360962,
301
- "eval_runtime": 599.0341,
302
- "eval_samples_per_second": 226.261,
303
- "eval_steps_per_second": 0.885,
304
- "step": 8400
305
  }
306
  ],
307
  "max_steps": 953000,
308
  "num_train_epochs": 100,
309
- "total_flos": 3.4554660161659546e+17,
310
  "trial_name": null,
311
  "trial_params": null
312
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.04197271773347324,
5
+ "global_step": 400,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.04,
12
+ "eval_accuracy": 0.7687216869069928,
13
+ "eval_f1": 0.7665424923390772,
14
+ "eval_loss": 0.9364227652549744,
15
+ "eval_runtime": 684.6631,
16
+ "eval_samples_per_second": 197.963,
17
+ "eval_steps_per_second": 0.774,
18
  "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  }
20
  ],
21
  "max_steps": 953000,
22
  "num_train_epochs": 100,
23
+ "total_flos": 1.696937333370624e+16,
24
  "trial_name": null,
25
  "trial_params": null
26
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea41fd91ff10f125f04655f13d7342a4a6d46b520f2bf021943ae174d4b760bb
3
  size 3963
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:830ad87020514b7aac2f6ee4fe46c6c863455341fec1d90ca196628ce27b82af
3
  size 3963
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47de52e58786b8b98181cbc186cb69933573acd6041e65760d03400d3c901a9f
3
- size 2242911029
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88eafde7549bbcb49017355eaeb21a46d8f90e1af6a6b1611cf95b3efd4e3a31
3
+ size 2268278205
runs/Jun18_23-02-20_bb321ddc67eb/events.out.tfevents.1687130449.bb321ddc67eb.435.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abf98e8f702e5b3a2fae161117f29939a683b8449198803d4cc342d19c74e215
3
+ size 4721
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea41fd91ff10f125f04655f13d7342a4a6d46b520f2bf021943ae174d4b760bb
3
  size 3963
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:830ad87020514b7aac2f6ee4fe46c6c863455341fec1d90ca196628ce27b82af
3
  size 3963