mapama247 commited on
Commit
5873ab0
·
1 Parent(s): b0e006a

upload ipc_level0 model

Browse files
config.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "../models/roberta-large/",
3
+ "architectures": [
4
+ "RobertaForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "eos_token_id": 2,
9
+ "finetuning_task": "ipc0",
10
+ "gradient_checkpointing": false,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 1024,
14
+ "id2label": {
15
+ "0": "A",
16
+ "1": "B",
17
+ "2": "C",
18
+ "3": "D",
19
+ "4": "E",
20
+ "5": "F",
21
+ "6": "G",
22
+ "7": "H"
23
+ },
24
+ "initializer_range": 0.02,
25
+ "intermediate_size": 4096,
26
+ "label2id": {
27
+ "A": 0,
28
+ "B": 1,
29
+ "C": 2,
30
+ "D": 3,
31
+ "E": 4,
32
+ "F": 5,
33
+ "G": 6,
34
+ "H": 7
35
+ },
36
+ "layer_norm_eps": 1e-05,
37
+ "max_position_embeddings": 514,
38
+ "model_type": "roberta",
39
+ "num_attention_heads": 16,
40
+ "num_hidden_layers": 24,
41
+ "pad_token_id": 1,
42
+ "position_embedding_type": "absolute",
43
+ "problem_type": "multi_label_classification",
44
+ "transformers_version": "4.6.1",
45
+ "type_vocab_size": 1,
46
+ "use_cache": true,
47
+ "vocab_size": 50265
48
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0f179b15fc1ed7e8b57544150935f52c02d380a1ab401567979987615bd3524
3
+ size 1421635885
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:79c6c475f7f9e1eedf88fa4fee6a35cdb392d2f8258ac19db5995b78fd175097
3
+ size 17563
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "add_prefix_space": false, "errors": "replace", "sep_token": "</s>", "cls_token": "<s>", "pad_token": "<pad>", "mask_token": "<mask>", "special_tokens_map_file": null, "name_or_path": "../models/roberta-large/"}
trainer_state.json ADDED
@@ -0,0 +1,704 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8422989695943094,
3
+ "best_model_checkpoint": "./output//roberta-large_ipc0_5__5e-5_0.01_0.06_07-20-22_16-12/checkpoint-78000",
4
+ "epoch": 0.14747043196404466,
5
+ "global_step": 86000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.0,
12
+ "learning_rate": 5.71588616241119e-07,
13
+ "loss": 0.3883,
14
+ "step": 2000
15
+ },
16
+ {
17
+ "epoch": 0.0,
18
+ "eval_accuracy": 0.5407561450712831,
19
+ "eval_f1": 0.6846357008233311,
20
+ "eval_loss": 0.2273142784833908,
21
+ "eval_roc_auc": 0.7884579701932871,
22
+ "eval_runtime": 4707.9267,
23
+ "eval_samples_per_second": 62.918,
24
+ "step": 2000
25
+ },
26
+ {
27
+ "epoch": 0.01,
28
+ "learning_rate": 1.143177232482238e-06,
29
+ "loss": 0.2054,
30
+ "step": 4000
31
+ },
32
+ {
33
+ "epoch": 0.01,
34
+ "eval_accuracy": 0.6222245478760217,
35
+ "eval_f1": 0.7599527299062029,
36
+ "eval_loss": 0.18135647475719452,
37
+ "eval_roc_auc": 0.8430138570362599,
38
+ "eval_runtime": 4704.9968,
39
+ "eval_samples_per_second": 62.957,
40
+ "step": 4000
41
+ },
42
+ {
43
+ "epoch": 0.01,
44
+ "learning_rate": 1.7147658487233569e-06,
45
+ "loss": 0.1754,
46
+ "step": 6000
47
+ },
48
+ {
49
+ "epoch": 0.01,
50
+ "eval_accuracy": 0.6469635026146725,
51
+ "eval_f1": 0.7830456416949749,
52
+ "eval_loss": 0.16404622793197632,
53
+ "eval_roc_auc": 0.858923618616464,
54
+ "eval_runtime": 4707.9088,
55
+ "eval_samples_per_second": 62.918,
56
+ "step": 6000
57
+ },
58
+ {
59
+ "epoch": 0.01,
60
+ "learning_rate": 2.286354464964476e-06,
61
+ "loss": 0.1619,
62
+ "step": 8000
63
+ },
64
+ {
65
+ "epoch": 0.01,
66
+ "eval_accuracy": 0.6548936069652581,
67
+ "eval_f1": 0.7882942239180966,
68
+ "eval_loss": 0.15830162167549133,
69
+ "eval_roc_auc": 0.8607893437318509,
70
+ "eval_runtime": 4704.8509,
71
+ "eval_samples_per_second": 62.959,
72
+ "step": 8000
73
+ },
74
+ {
75
+ "epoch": 0.02,
76
+ "learning_rate": 2.857943081205595e-06,
77
+ "loss": 0.1536,
78
+ "step": 10000
79
+ },
80
+ {
81
+ "epoch": 0.02,
82
+ "eval_accuracy": 0.6733600483435906,
83
+ "eval_f1": 0.8012577006532831,
84
+ "eval_loss": 0.14840050041675568,
85
+ "eval_roc_auc": 0.8665618320388239,
86
+ "eval_runtime": 4704.9917,
87
+ "eval_samples_per_second": 62.957,
88
+ "step": 10000
89
+ },
90
+ {
91
+ "epoch": 0.02,
92
+ "learning_rate": 3.4295316974467138e-06,
93
+ "loss": 0.1479,
94
+ "step": 12000
95
+ },
96
+ {
97
+ "epoch": 0.02,
98
+ "eval_accuracy": 0.6778433087001583,
99
+ "eval_f1": 0.8084837754895617,
100
+ "eval_loss": 0.14521972835063934,
101
+ "eval_roc_auc": 0.8744209349192423,
102
+ "eval_runtime": 4700.2006,
103
+ "eval_samples_per_second": 63.021,
104
+ "step": 12000
105
+ },
106
+ {
107
+ "epoch": 0.02,
108
+ "learning_rate": 4.001120313687832e-06,
109
+ "loss": 0.1433,
110
+ "step": 14000
111
+ },
112
+ {
113
+ "epoch": 0.02,
114
+ "eval_accuracy": 0.6842576119211513,
115
+ "eval_f1": 0.8146811536296887,
116
+ "eval_loss": 0.13989576697349548,
117
+ "eval_roc_auc": 0.8784560258941168,
118
+ "eval_runtime": 4713.9153,
119
+ "eval_samples_per_second": 62.838,
120
+ "step": 14000
121
+ },
122
+ {
123
+ "epoch": 0.03,
124
+ "learning_rate": 4.572708929928952e-06,
125
+ "loss": 0.1398,
126
+ "step": 16000
127
+ },
128
+ {
129
+ "epoch": 0.03,
130
+ "eval_accuracy": 0.6886058343151719,
131
+ "eval_f1": 0.8160765004171728,
132
+ "eval_loss": 0.13773857057094574,
133
+ "eval_roc_auc": 0.877459307877016,
134
+ "eval_runtime": 4705.5648,
135
+ "eval_samples_per_second": 62.95,
136
+ "step": 16000
137
+ },
138
+ {
139
+ "epoch": 0.03,
140
+ "learning_rate": 5.144297546170071e-06,
141
+ "loss": 0.1371,
142
+ "step": 18000
143
+ },
144
+ {
145
+ "epoch": 0.03,
146
+ "eval_accuracy": 0.6879238926043084,
147
+ "eval_f1": 0.8203506019576413,
148
+ "eval_loss": 0.13596272468566895,
149
+ "eval_roc_auc": 0.8839289957852754,
150
+ "eval_runtime": 4711.3568,
151
+ "eval_samples_per_second": 62.872,
152
+ "step": 18000
153
+ },
154
+ {
155
+ "epoch": 0.03,
156
+ "learning_rate": 5.71588616241119e-06,
157
+ "loss": 0.1348,
158
+ "step": 20000
159
+ },
160
+ {
161
+ "epoch": 0.03,
162
+ "eval_accuracy": 0.6958573729039577,
163
+ "eval_f1": 0.824050131286724,
164
+ "eval_loss": 0.13410066068172455,
165
+ "eval_roc_auc": 0.8851249727718851,
166
+ "eval_runtime": 4700.4765,
167
+ "eval_samples_per_second": 63.018,
168
+ "step": 20000
169
+ },
170
+ {
171
+ "epoch": 0.04,
172
+ "learning_rate": 6.287474778652308e-06,
173
+ "loss": 0.1337,
174
+ "step": 22000
175
+ },
176
+ {
177
+ "epoch": 0.04,
178
+ "eval_accuracy": 0.6954286273728701,
179
+ "eval_f1": 0.8228504902698741,
180
+ "eval_loss": 0.13327716290950775,
181
+ "eval_roc_auc": 0.8827479207014749,
182
+ "eval_runtime": 4730.8966,
183
+ "eval_samples_per_second": 62.612,
184
+ "step": 22000
185
+ },
186
+ {
187
+ "epoch": 0.04,
188
+ "learning_rate": 6.8590633948934276e-06,
189
+ "loss": 0.1331,
190
+ "step": 24000
191
+ },
192
+ {
193
+ "epoch": 0.04,
194
+ "eval_accuracy": 0.6971469854462836,
195
+ "eval_f1": 0.8237129350972603,
196
+ "eval_loss": 0.13190585374832153,
197
+ "eval_roc_auc": 0.883230389109756,
198
+ "eval_runtime": 4692.6605,
199
+ "eval_samples_per_second": 63.123,
200
+ "step": 24000
201
+ },
202
+ {
203
+ "epoch": 0.04,
204
+ "learning_rate": 7.4306520111345465e-06,
205
+ "loss": 0.1309,
206
+ "step": 26000
207
+ },
208
+ {
209
+ "epoch": 0.04,
210
+ "eval_accuracy": 0.7027004216560381,
211
+ "eval_f1": 0.8271447851497717,
212
+ "eval_loss": 0.13012564182281494,
213
+ "eval_roc_auc": 0.8860201435878601,
214
+ "eval_runtime": 4762.2583,
215
+ "eval_samples_per_second": 62.2,
216
+ "step": 26000
217
+ },
218
+ {
219
+ "epoch": 0.05,
220
+ "learning_rate": 8.002240627375665e-06,
221
+ "loss": 0.1298,
222
+ "step": 28000
223
+ },
224
+ {
225
+ "epoch": 0.05,
226
+ "eval_accuracy": 0.7013432901324385,
227
+ "eval_f1": 0.8245749680737388,
228
+ "eval_loss": 0.12999016046524048,
229
+ "eval_roc_auc": 0.8808593693278481,
230
+ "eval_runtime": 4730.1652,
231
+ "eval_samples_per_second": 62.622,
232
+ "step": 28000
233
+ },
234
+ {
235
+ "epoch": 0.05,
236
+ "learning_rate": 8.573829243616784e-06,
237
+ "loss": 0.1284,
238
+ "step": 30000
239
+ },
240
+ {
241
+ "epoch": 0.05,
242
+ "eval_accuracy": 0.7064072137279592,
243
+ "eval_f1": 0.8300594309848033,
244
+ "eval_loss": 0.12789227068424225,
245
+ "eval_roc_auc": 0.887462059567475,
246
+ "eval_runtime": 4709.67,
247
+ "eval_samples_per_second": 62.895,
248
+ "step": 30000
249
+ },
250
+ {
251
+ "epoch": 0.05,
252
+ "learning_rate": 9.145417859857904e-06,
253
+ "loss": 0.1294,
254
+ "step": 32000
255
+ },
256
+ {
257
+ "epoch": 0.05,
258
+ "eval_accuracy": 0.7096987640650478,
259
+ "eval_f1": 0.8326160218501911,
260
+ "eval_loss": 0.12640197575092316,
261
+ "eval_roc_auc": 0.8885914760719662,
262
+ "eval_runtime": 4696.3704,
263
+ "eval_samples_per_second": 63.073,
264
+ "step": 32000
265
+ },
266
+ {
267
+ "epoch": 0.06,
268
+ "learning_rate": 9.717006476099022e-06,
269
+ "loss": 0.1281,
270
+ "step": 34000
271
+ },
272
+ {
273
+ "epoch": 0.06,
274
+ "eval_accuracy": 0.7070351402538039,
275
+ "eval_f1": 0.8319073213351634,
276
+ "eval_loss": 0.12621602416038513,
277
+ "eval_roc_auc": 0.8896376613488131,
278
+ "eval_runtime": 4698.8191,
279
+ "eval_samples_per_second": 63.04,
280
+ "step": 34000
281
+ },
282
+ {
283
+ "epoch": 0.06,
284
+ "learning_rate": 1.0288595092340142e-05,
285
+ "loss": 0.1266,
286
+ "step": 36000
287
+ },
288
+ {
289
+ "epoch": 0.06,
290
+ "eval_accuracy": 0.7081255718013727,
291
+ "eval_f1": 0.8335308031653823,
292
+ "eval_loss": 0.12684929370880127,
293
+ "eval_roc_auc": 0.8919053253578205,
294
+ "eval_runtime": 4698.6474,
295
+ "eval_samples_per_second": 63.042,
296
+ "step": 36000
297
+ },
298
+ {
299
+ "epoch": 0.07,
300
+ "learning_rate": 1.086018370858126e-05,
301
+ "loss": 0.1264,
302
+ "step": 38000
303
+ },
304
+ {
305
+ "epoch": 0.07,
306
+ "eval_accuracy": 0.7080074135841439,
307
+ "eval_f1": 0.8339326805263629,
308
+ "eval_loss": 0.12483926117420197,
309
+ "eval_roc_auc": 0.8912626509356554,
310
+ "eval_runtime": 4703.9737,
311
+ "eval_samples_per_second": 62.971,
312
+ "step": 38000
313
+ },
314
+ {
315
+ "epoch": 0.07,
316
+ "learning_rate": 1.143177232482238e-05,
317
+ "loss": 0.1252,
318
+ "step": 40000
319
+ },
320
+ {
321
+ "epoch": 0.07,
322
+ "eval_accuracy": 0.7134258118313511,
323
+ "eval_f1": 0.8343130408901507,
324
+ "eval_loss": 0.12493231147527695,
325
+ "eval_roc_auc": 0.8898269175762451,
326
+ "eval_runtime": 4715.1654,
327
+ "eval_samples_per_second": 62.821,
328
+ "step": 40000
329
+ },
330
+ {
331
+ "epoch": 0.07,
332
+ "learning_rate": 1.20033609410635e-05,
333
+ "loss": 0.1253,
334
+ "step": 42000
335
+ },
336
+ {
337
+ "epoch": 0.07,
338
+ "eval_accuracy": 0.7114036183422064,
339
+ "eval_f1": 0.8363716860622692,
340
+ "eval_loss": 0.12403804063796997,
341
+ "eval_roc_auc": 0.8935152243226613,
342
+ "eval_runtime": 4721.8512,
343
+ "eval_samples_per_second": 62.732,
344
+ "step": 42000
345
+ },
346
+ {
347
+ "epoch": 0.08,
348
+ "learning_rate": 1.2574949557304616e-05,
349
+ "loss": 0.1241,
350
+ "step": 44000
351
+ },
352
+ {
353
+ "epoch": 0.08,
354
+ "eval_accuracy": 0.7098405539257223,
355
+ "eval_f1": 0.8373545897529787,
356
+ "eval_loss": 0.12371223419904709,
357
+ "eval_roc_auc": 0.8963795632569544,
358
+ "eval_runtime": 4728.9925,
359
+ "eval_samples_per_second": 62.638,
360
+ "step": 44000
361
+ },
362
+ {
363
+ "epoch": 0.08,
364
+ "learning_rate": 1.3146538173545735e-05,
365
+ "loss": 0.1238,
366
+ "step": 46000
367
+ },
368
+ {
369
+ "epoch": 0.08,
370
+ "eval_accuracy": 0.7124602903991385,
371
+ "eval_f1": 0.8370424197096473,
372
+ "eval_loss": 0.12373730540275574,
373
+ "eval_roc_auc": 0.8944393753665816,
374
+ "eval_runtime": 4701.3381,
375
+ "eval_samples_per_second": 63.006,
376
+ "step": 46000
377
+ },
378
+ {
379
+ "epoch": 0.08,
380
+ "learning_rate": 1.3718126789786855e-05,
381
+ "loss": 0.1238,
382
+ "step": 48000
383
+ },
384
+ {
385
+ "epoch": 0.08,
386
+ "eval_accuracy": 0.7092024995526868,
387
+ "eval_f1": 0.8358656658561511,
388
+ "eval_loss": 0.125064879655838,
389
+ "eval_roc_auc": 0.895248662380464,
390
+ "eval_runtime": 4691.6911,
391
+ "eval_samples_per_second": 63.136,
392
+ "step": 48000
393
+ },
394
+ {
395
+ "epoch": 0.09,
396
+ "learning_rate": 1.4289715406027973e-05,
397
+ "loss": 0.1228,
398
+ "step": 50000
399
+ },
400
+ {
401
+ "epoch": 0.09,
402
+ "eval_accuracy": 0.7139119484965211,
403
+ "eval_f1": 0.8380575940983299,
404
+ "eval_loss": 0.12305936962366104,
405
+ "eval_roc_auc": 0.8943202372027479,
406
+ "eval_runtime": 4708.5133,
407
+ "eval_samples_per_second": 62.91,
408
+ "step": 50000
409
+ },
410
+ {
411
+ "epoch": 0.09,
412
+ "learning_rate": 1.4861304022269093e-05,
413
+ "loss": 0.123,
414
+ "step": 52000
415
+ },
416
+ {
417
+ "epoch": 0.09,
418
+ "eval_accuracy": 0.7159408938837931,
419
+ "eval_f1": 0.8405024216747294,
420
+ "eval_loss": 0.12146713584661484,
421
+ "eval_roc_auc": 0.8976514718844044,
422
+ "eval_runtime": 4705.5754,
423
+ "eval_samples_per_second": 62.949,
424
+ "step": 52000
425
+ },
426
+ {
427
+ "epoch": 0.09,
428
+ "learning_rate": 1.5432892638510214e-05,
429
+ "loss": 0.123,
430
+ "step": 54000
431
+ },
432
+ {
433
+ "epoch": 0.09,
434
+ "eval_accuracy": 0.7114069942912701,
435
+ "eval_f1": 0.8382327921361191,
436
+ "eval_loss": 0.12516580522060394,
437
+ "eval_roc_auc": 0.8973778420243191,
438
+ "eval_runtime": 4690.4939,
439
+ "eval_samples_per_second": 63.152,
440
+ "step": 54000
441
+ },
442
+ {
443
+ "epoch": 0.1,
444
+ "learning_rate": 1.600448125475133e-05,
445
+ "loss": 0.1222,
446
+ "step": 56000
447
+ },
448
+ {
449
+ "epoch": 0.1,
450
+ "eval_accuracy": 0.7157248331437175,
451
+ "eval_f1": 0.8389102808480002,
452
+ "eval_loss": 0.1224837601184845,
453
+ "eval_roc_auc": 0.8953561781764309,
454
+ "eval_runtime": 4699.8836,
455
+ "eval_samples_per_second": 63.026,
456
+ "step": 56000
457
+ },
458
+ {
459
+ "epoch": 0.1,
460
+ "learning_rate": 1.657606987099245e-05,
461
+ "loss": 0.1222,
462
+ "step": 58000
463
+ },
464
+ {
465
+ "epoch": 0.1,
466
+ "eval_accuracy": 0.7158564951572011,
467
+ "eval_f1": 0.8399927583173271,
468
+ "eval_loss": 0.12127628922462463,
469
+ "eval_roc_auc": 0.8965841568131228,
470
+ "eval_runtime": 4698.6002,
471
+ "eval_samples_per_second": 63.043,
472
+ "step": 58000
473
+ },
474
+ {
475
+ "epoch": 0.1,
476
+ "learning_rate": 1.714765848723357e-05,
477
+ "loss": 0.1219,
478
+ "step": 60000
479
+ },
480
+ {
481
+ "epoch": 0.1,
482
+ "eval_accuracy": 0.7170279494822982,
483
+ "eval_f1": 0.8398695690490878,
484
+ "eval_loss": 0.12054365128278732,
485
+ "eval_roc_auc": 0.8949977373242076,
486
+ "eval_runtime": 4766.0245,
487
+ "eval_samples_per_second": 62.151,
488
+ "step": 60000
489
+ },
490
+ {
491
+ "epoch": 0.11,
492
+ "learning_rate": 1.7719247103474687e-05,
493
+ "loss": 0.1217,
494
+ "step": 62000
495
+ },
496
+ {
497
+ "epoch": 0.11,
498
+ "eval_accuracy": 0.7173587924905389,
499
+ "eval_f1": 0.8384044315001998,
500
+ "eval_loss": 0.12145062536001205,
501
+ "eval_roc_auc": 0.8928348976264423,
502
+ "eval_runtime": 4726.8677,
503
+ "eval_samples_per_second": 62.666,
504
+ "step": 62000
505
+ },
506
+ {
507
+ "epoch": 0.11,
508
+ "learning_rate": 1.8290835719715808e-05,
509
+ "loss": 0.1212,
510
+ "step": 64000
511
+ },
512
+ {
513
+ "epoch": 0.11,
514
+ "eval_accuracy": 0.7183074341774331,
515
+ "eval_f1": 0.8391874180865005,
516
+ "eval_loss": 0.12094008922576904,
517
+ "eval_roc_auc": 0.8939367915265275,
518
+ "eval_runtime": 4721.7671,
519
+ "eval_samples_per_second": 62.734,
520
+ "step": 64000
521
+ },
522
+ {
523
+ "epoch": 0.11,
524
+ "learning_rate": 1.8862424335956926e-05,
525
+ "loss": 0.1217,
526
+ "step": 66000
527
+ },
528
+ {
529
+ "epoch": 0.11,
530
+ "eval_accuracy": 0.7173655443886663,
531
+ "eval_f1": 0.8390122732329008,
532
+ "eval_loss": 0.12199072539806366,
533
+ "eval_roc_auc": 0.8938961477593685,
534
+ "eval_runtime": 4689.9026,
535
+ "eval_samples_per_second": 63.16,
536
+ "step": 66000
537
+ },
538
+ {
539
+ "epoch": 0.12,
540
+ "learning_rate": 1.9434012952198044e-05,
541
+ "loss": 0.1211,
542
+ "step": 68000
543
+ },
544
+ {
545
+ "epoch": 0.12,
546
+ "eval_accuracy": 0.7192932113040278,
547
+ "eval_f1": 0.8406356467928897,
548
+ "eval_loss": 0.12074108421802521,
549
+ "eval_roc_auc": 0.8959624591328685,
550
+ "eval_runtime": 4689.6506,
551
+ "eval_samples_per_second": 63.163,
552
+ "step": 68000
553
+ },
554
+ {
555
+ "epoch": 0.12,
556
+ "learning_rate": 2.0005601568439166e-05,
557
+ "loss": 0.1218,
558
+ "step": 70000
559
+ },
560
+ {
561
+ "epoch": 0.12,
562
+ "eval_accuracy": 0.718013726608893,
563
+ "eval_f1": 0.8394379496338936,
564
+ "eval_loss": 0.1212056577205658,
565
+ "eval_roc_auc": 0.8946202640469076,
566
+ "eval_runtime": 4729.1038,
567
+ "eval_samples_per_second": 62.636,
568
+ "step": 70000
569
+ },
570
+ {
571
+ "epoch": 0.12,
572
+ "learning_rate": 2.0577190184680284e-05,
573
+ "loss": 0.1214,
574
+ "step": 72000
575
+ },
576
+ {
577
+ "epoch": 0.12,
578
+ "eval_accuracy": 0.7165721963587013,
579
+ "eval_f1": 0.8417356069559346,
580
+ "eval_loss": 0.12099753320217133,
581
+ "eval_roc_auc": 0.8992088434208416,
582
+ "eval_runtime": 4735.2667,
583
+ "eval_samples_per_second": 62.555,
584
+ "step": 72000
585
+ },
586
+ {
587
+ "epoch": 0.13,
588
+ "learning_rate": 2.11487788009214e-05,
589
+ "loss": 0.1208,
590
+ "step": 74000
591
+ },
592
+ {
593
+ "epoch": 0.13,
594
+ "eval_accuracy": 0.7096379969819016,
595
+ "eval_f1": 0.8359202990403864,
596
+ "eval_loss": 0.12574784457683563,
597
+ "eval_roc_auc": 0.8958735028365457,
598
+ "eval_runtime": 4708.3575,
599
+ "eval_samples_per_second": 62.912,
600
+ "step": 74000
601
+ },
602
+ {
603
+ "epoch": 0.13,
604
+ "learning_rate": 2.172036741716252e-05,
605
+ "loss": 0.1202,
606
+ "step": 76000
607
+ },
608
+ {
609
+ "epoch": 0.13,
610
+ "eval_accuracy": 0.7170211975841708,
611
+ "eval_f1": 0.8380538350579405,
612
+ "eval_loss": 0.12220340967178345,
613
+ "eval_roc_auc": 0.8937227496461092,
614
+ "eval_runtime": 4705.5508,
615
+ "eval_samples_per_second": 62.95,
616
+ "step": 76000
617
+ },
618
+ {
619
+ "epoch": 0.13,
620
+ "learning_rate": 2.229195603340364e-05,
621
+ "loss": 0.1211,
622
+ "step": 78000
623
+ },
624
+ {
625
+ "epoch": 0.13,
626
+ "eval_accuracy": 0.7159375179347294,
627
+ "eval_f1": 0.8422989695943094,
628
+ "eval_loss": 0.12021646648645401,
629
+ "eval_roc_auc": 0.8995460792466595,
630
+ "eval_runtime": 4706.4093,
631
+ "eval_samples_per_second": 62.938,
632
+ "step": 78000
633
+ },
634
+ {
635
+ "epoch": 0.14,
636
+ "learning_rate": 2.286354464964476e-05,
637
+ "loss": 0.1212,
638
+ "step": 80000
639
+ },
640
+ {
641
+ "epoch": 0.14,
642
+ "eval_accuracy": 0.7161333229804229,
643
+ "eval_f1": 0.8413767292128633,
644
+ "eval_loss": 0.12168838083744049,
645
+ "eval_roc_auc": 0.8989878842731752,
646
+ "eval_runtime": 4758.053,
647
+ "eval_samples_per_second": 62.255,
648
+ "step": 80000
649
+ },
650
+ {
651
+ "epoch": 0.14,
652
+ "learning_rate": 2.3435133265885877e-05,
653
+ "loss": 0.1211,
654
+ "step": 82000
655
+ },
656
+ {
657
+ "epoch": 0.14,
658
+ "eval_accuracy": 0.7183985848021525,
659
+ "eval_f1": 0.8372888052842701,
660
+ "eval_loss": 0.12127470970153809,
661
+ "eval_roc_auc": 0.891376589798138,
662
+ "eval_runtime": 4718.8962,
663
+ "eval_samples_per_second": 62.772,
664
+ "step": 82000
665
+ },
666
+ {
667
+ "epoch": 0.14,
668
+ "learning_rate": 2.4006721882127e-05,
669
+ "loss": 0.1208,
670
+ "step": 84000
671
+ },
672
+ {
673
+ "epoch": 0.14,
674
+ "eval_accuracy": 0.7164675419377272,
675
+ "eval_f1": 0.8410196283399609,
676
+ "eval_loss": 0.12072896212339401,
677
+ "eval_roc_auc": 0.8975580711258158,
678
+ "eval_runtime": 4700.4599,
679
+ "eval_samples_per_second": 63.018,
680
+ "step": 84000
681
+ },
682
+ {
683
+ "epoch": 0.15,
684
+ "learning_rate": 2.4578310498368117e-05,
685
+ "loss": 0.1207,
686
+ "step": 86000
687
+ },
688
+ {
689
+ "epoch": 0.15,
690
+ "eval_accuracy": 0.7182635468396052,
691
+ "eval_f1": 0.8404677971481037,
692
+ "eval_loss": 0.12209167331457138,
693
+ "eval_roc_auc": 0.8963189679938305,
694
+ "eval_runtime": 4690.2466,
695
+ "eval_samples_per_second": 63.155,
696
+ "step": 86000
697
+ }
698
+ ],
699
+ "max_steps": 2915835,
700
+ "num_train_epochs": 5,
701
+ "total_flos": 1.39736361467904e+17,
702
+ "trial_name": null,
703
+ "trial_params": null
704
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ede448aac65db0a1c8da70bed695e971ec99a3b7595b7373bc24bebb8232e59
3
+ size 2479
vocab.json ADDED
The diff for this file is too large to render. See raw diff