chizhik commited on
Commit
c0e3ff5
·
1 Parent(s): 7aa425b

updated model weights

Browse files
.DS_Store ADDED
Binary file (6.15 kB). View file
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/mnt/beegfs/mc000051/CERPLES/Models/bert-base-spanish-wwm-cased",
3
  "architectures": [
4
  "BertForSequenceClassification"
5
  ],
 
1
  {
2
+ "_name_or_path": "Models/bert-base-spanish-wwm-cased",
3
  "architectures": [
4
  "BertForSequenceClassification"
5
  ],
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d31a759ec106786b7d14b9db5903a8d1fc4094c5aba3dfa09c784b33bcb11b8
3
  size 879021981
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d0f5d55679c9649feef087aac6e35c24e2ba7740b6451422c9da4c0b3fe0e15
3
  size 879021981
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd2df4b7c392b8d623f36a4e2ef75af9a85e57170bab256fcca5ab95d80e9cf7
3
  size 439523757
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14a497b1de3b9690a8def05e6522dc138ae0d30fe5d8b23d86922d491a3c206c
3
  size 439523757
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f01fef0c59d4860e25106e718f35edb1ef106ebaa4b62dfd95f8e7e4cf4e146e
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1c5ffa6dfdfce8b9b2e8b1186fa1255461514df45bfa6889a547ccb5f4f00d9
3
  size 14567
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:392a982a9ac8e9d0e956548d877a9d8bcc8f61fbe896d0a05495f841534dd993
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf40301a5f58872f87f1d2e7590a284756e863760c4895c4f9a55b272e36c722
3
  size 623
tokenizer.json CHANGED
@@ -1,11 +1,6 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 512,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
  "padding": null,
10
  "added_tokens": [
11
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"do_lower_case": false, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": false, "name_or_path": "/mnt/beegfs/mc000051/CERPLES/Models/bert-base-spanish-wwm-cased", "do_basic_tokenize": true, "never_split": null, "model_max_length": 512, "special_tokens_map_file": "/mnt/beegfs/mc000051/CERPLES/Models/bert-base-spanish-wwm-cased/special_tokens_map.json", "tokenizer_class": "BertTokenizer"}
 
1
+ {"do_lower_case": false, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "tokenize_chinese_chars": true, "strip_accents": false, "name_or_path": "Models/bert-base-spanish-wwm-cased", "do_basic_tokenize": true, "never_split": null, "model_max_length": 512, "special_tokens_map_file": "Models/bert-base-spanish-wwm-cased/special_tokens_map.json", "tokenizer_class": "BertTokenizer"}
trainer_state.json CHANGED
@@ -1,156 +1,340 @@
1
  {
2
- "best_metric": 0.6235998688300035,
3
- "best_model_checkpoint": "./CARES/checkpoints/bert-bas/checkpoint-3500",
4
- "epoch": 24.822695035460992,
5
- "global_step": 3500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  {
11
  "epoch": 3.55,
12
  "learning_rate": 2.8936170212765956e-05,
13
- "loss": 0.2014,
14
  "step": 500
15
  },
16
  {
17
- "epoch": 3.55,
18
- "eval_loss": 0.15534088015556335,
19
- "eval_macro_f1": 0.34191398087233876,
20
- "eval_macro_precision": 0.4575725498723576,
21
- "eval_macro_recall": 0.32634682637580636,
22
- "eval_micro_f1": 0.7120380534211489,
23
- "eval_micro_precision": 0.8108333333333333,
24
- "eval_micro_recall": 0.634703196347032,
25
- "eval_runtime": 2.9867,
26
- "eval_samples_per_second": 323.432,
27
- "eval_steps_per_second": 20.424,
28
- "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  },
30
  {
31
  "epoch": 7.09,
32
  "learning_rate": 2.7872340425531914e-05,
33
- "loss": 0.0909,
34
  "step": 1000
35
  },
36
  {
37
- "epoch": 7.09,
38
- "eval_loss": 0.14328321814537048,
39
- "eval_macro_f1": 0.5216600519885811,
40
- "eval_macro_precision": 0.6582415980876382,
41
- "eval_macro_recall": 0.45829582028075544,
42
- "eval_micro_f1": 0.7527932960893854,
43
- "eval_micro_precision": 0.8099173553719008,
44
- "eval_micro_recall": 0.7031963470319634,
45
- "eval_runtime": 2.9852,
46
- "eval_samples_per_second": 323.592,
47
- "eval_steps_per_second": 20.434,
48
- "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  },
50
  {
51
  "epoch": 10.64,
52
  "learning_rate": 2.6808510638297873e-05,
53
- "loss": 0.0428,
54
  "step": 1500
55
  },
56
  {
57
- "epoch": 10.64,
58
- "eval_loss": 0.1534910351037979,
59
- "eval_macro_f1": 0.5762696837727688,
60
- "eval_macro_precision": 0.6633765574994241,
61
- "eval_macro_recall": 0.5217654870639399,
62
- "eval_micro_f1": 0.7763426240652617,
63
- "eval_micro_precision": 0.8105039034776437,
64
- "eval_micro_recall": 0.7449445531637312,
65
- "eval_runtime": 2.9874,
66
- "eval_samples_per_second": 323.358,
67
- "eval_steps_per_second": 20.419,
68
- "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  },
70
  {
71
  "epoch": 14.18,
72
  "learning_rate": 2.574468085106383e-05,
73
- "loss": 0.0231,
74
  "step": 2000
75
  },
76
  {
77
- "epoch": 14.18,
78
- "eval_loss": 0.15115897357463837,
79
- "eval_macro_f1": 0.5809807709622608,
80
- "eval_macro_precision": 0.6166814111331129,
81
- "eval_macro_recall": 0.5608937720056061,
82
- "eval_micro_f1": 0.7775925308436145,
83
- "eval_micro_precision": 0.7953615279672579,
84
- "eval_micro_recall": 0.7606001304631441,
85
- "eval_runtime": 2.985,
86
- "eval_samples_per_second": 323.621,
87
- "eval_steps_per_second": 20.436,
88
- "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  },
90
  {
91
  "epoch": 17.73,
92
  "learning_rate": 2.4680851063829786e-05,
93
- "loss": 0.0137,
94
  "step": 2500
95
  },
96
  {
97
- "epoch": 17.73,
98
- "eval_loss": 0.16502933204174042,
99
- "eval_macro_f1": 0.5991629446392395,
100
- "eval_macro_precision": 0.6221331811310129,
101
- "eval_macro_recall": 0.5815882875339234,
102
- "eval_micro_f1": 0.7877187190491911,
103
- "eval_micro_precision": 0.7974598930481284,
104
- "eval_micro_recall": 0.7782126549249837,
105
- "eval_runtime": 2.9854,
106
- "eval_samples_per_second": 323.576,
107
- "eval_steps_per_second": 20.433,
108
- "step": 2500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  },
110
  {
111
- "epoch": 21.28,
112
- "learning_rate": 2.3617021276595744e-05,
113
- "loss": 0.0087,
114
- "step": 3000
115
- },
116
- {
117
- "epoch": 21.28,
118
- "eval_loss": 0.1669953465461731,
119
- "eval_macro_f1": 0.6173151275498924,
120
- "eval_macro_precision": 0.6814989884732782,
121
- "eval_macro_recall": 0.5992296363093449,
122
- "eval_micro_f1": 0.7861842105263159,
123
- "eval_micro_precision": 0.7929661579296616,
124
- "eval_micro_recall": 0.7795172863666014,
125
- "eval_runtime": 2.9877,
126
- "eval_samples_per_second": 323.321,
127
- "eval_steps_per_second": 20.417,
128
- "step": 3000
129
- },
130
- {
131
- "epoch": 24.82,
132
- "learning_rate": 2.2553191489361703e-05,
133
- "loss": 0.0063,
134
- "step": 3500
135
- },
136
- {
137
- "epoch": 24.82,
138
- "eval_loss": 0.18081951141357422,
139
- "eval_macro_f1": 0.6235998688300035,
140
- "eval_macro_precision": 0.6667425956441049,
141
- "eval_macro_recall": 0.5993735670848622,
142
- "eval_micro_f1": 0.7835120643431636,
143
- "eval_micro_precision": 0.8056512749827704,
144
- "eval_micro_recall": 0.7625570776255708,
145
- "eval_runtime": 2.9803,
146
- "eval_samples_per_second": 324.127,
147
- "eval_steps_per_second": 20.468,
148
- "step": 3500
149
  }
150
  ],
151
  "max_steps": 14100,
152
  "num_train_epochs": 100,
153
- "total_flos": 1.4624642507081568e+16,
154
  "trial_name": null,
155
  "trial_params": null
156
  }
 
1
  {
2
+ "best_metric": 0.58822980593355,
3
+ "best_model_checkpoint": "./CARES/checkpoints/bert-ba/checkpoint-2961",
4
+ "epoch": 21.0,
5
+ "global_step": 2961,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "eval_loss": 0.22213496267795563,
13
+ "eval_macro_f1": 0.08913506250646834,
14
+ "eval_macro_precision": 0.1601542788476893,
15
+ "eval_macro_recall": 0.07453283930073998,
16
+ "eval_micro_f1": 0.3941888619854721,
17
+ "eval_micro_precision": 0.7650375939849624,
18
+ "eval_micro_recall": 0.2654924983692107,
19
+ "eval_runtime": 3.0004,
20
+ "eval_samples_per_second": 321.956,
21
+ "eval_steps_per_second": 20.331,
22
+ "step": 141
23
+ },
24
+ {
25
+ "epoch": 2.0,
26
+ "eval_loss": 0.18322525918483734,
27
+ "eval_macro_f1": 0.20542764030139582,
28
+ "eval_macro_precision": 0.31153358140506404,
29
+ "eval_macro_recall": 0.1749941105591186,
30
+ "eval_micro_f1": 0.6042759177087536,
31
+ "eval_micro_precision": 0.7917547568710359,
32
+ "eval_micro_recall": 0.4885844748858447,
33
+ "eval_runtime": 3.0066,
34
+ "eval_samples_per_second": 321.295,
35
+ "eval_steps_per_second": 20.289,
36
+ "step": 282
37
+ },
38
+ {
39
+ "epoch": 3.0,
40
+ "eval_loss": 0.168878972530365,
41
+ "eval_macro_f1": 0.29169004336353355,
42
+ "eval_macro_precision": 0.39228947650947077,
43
+ "eval_macro_recall": 0.2624222179377449,
44
+ "eval_micro_f1": 0.6591789310611929,
45
+ "eval_micro_precision": 0.8112488083889419,
46
+ "eval_micro_recall": 0.5551206784083497,
47
+ "eval_runtime": 3.0155,
48
+ "eval_samples_per_second": 320.35,
49
+ "eval_steps_per_second": 20.229,
50
+ "step": 423
51
+ },
52
  {
53
  "epoch": 3.55,
54
  "learning_rate": 2.8936170212765956e-05,
55
+ "loss": 0.1959,
56
  "step": 500
57
  },
58
  {
59
+ "epoch": 4.0,
60
+ "eval_loss": 0.1501942127943039,
61
+ "eval_macro_f1": 0.35589775613243146,
62
+ "eval_macro_precision": 0.4667524057986162,
63
+ "eval_macro_recall": 0.3166674518971492,
64
+ "eval_micro_f1": 0.7155012948575658,
65
+ "eval_micro_precision": 0.8264957264957264,
66
+ "eval_micro_recall": 0.6307893020221788,
67
+ "eval_runtime": 3.0213,
68
+ "eval_samples_per_second": 319.733,
69
+ "eval_steps_per_second": 20.19,
70
+ "step": 564
71
+ },
72
+ {
73
+ "epoch": 5.0,
74
+ "eval_loss": 0.14672552049160004,
75
+ "eval_macro_f1": 0.47974247836953543,
76
+ "eval_macro_precision": 0.6466753641860046,
77
+ "eval_macro_recall": 0.42420722655395326,
78
+ "eval_micro_f1": 0.7180585296216989,
79
+ "eval_micro_precision": 0.7927501970055162,
80
+ "eval_micro_recall": 0.6562296151337247,
81
+ "eval_runtime": 3.0023,
82
+ "eval_samples_per_second": 321.752,
83
+ "eval_steps_per_second": 20.318,
84
+ "step": 705
85
+ },
86
+ {
87
+ "epoch": 6.0,
88
+ "eval_loss": 0.14830899238586426,
89
+ "eval_macro_f1": 0.4652554310103303,
90
+ "eval_macro_precision": 0.6251017145329125,
91
+ "eval_macro_recall": 0.4150412963066584,
92
+ "eval_micro_f1": 0.7256140350877194,
93
+ "eval_micro_precision": 0.7851176917236142,
94
+ "eval_micro_recall": 0.6744944553163731,
95
+ "eval_runtime": 2.9724,
96
+ "eval_samples_per_second": 324.991,
97
+ "eval_steps_per_second": 20.522,
98
+ "step": 846
99
+ },
100
+ {
101
+ "epoch": 7.0,
102
+ "eval_loss": 0.1467733532190323,
103
+ "eval_macro_f1": 0.5097890383483837,
104
+ "eval_macro_precision": 0.6555313836604214,
105
+ "eval_macro_recall": 0.4437744774893355,
106
+ "eval_micro_f1": 0.7510431154381085,
107
+ "eval_micro_precision": 0.8041697691734921,
108
+ "eval_micro_recall": 0.7045009784735812,
109
+ "eval_runtime": 2.9801,
110
+ "eval_samples_per_second": 324.15,
111
+ "eval_steps_per_second": 20.469,
112
+ "step": 987
113
  },
114
  {
115
  "epoch": 7.09,
116
  "learning_rate": 2.7872340425531914e-05,
117
+ "loss": 0.0904,
118
  "step": 1000
119
  },
120
  {
121
+ "epoch": 8.0,
122
+ "eval_loss": 0.14951790869235992,
123
+ "eval_macro_f1": 0.5411089496886734,
124
+ "eval_macro_precision": 0.6577005724429985,
125
+ "eval_macro_recall": 0.47852188216570773,
126
+ "eval_micro_f1": 0.7489539748953974,
127
+ "eval_micro_precision": 0.8044943820224719,
128
+ "eval_micro_recall": 0.700587084148728,
129
+ "eval_runtime": 2.9774,
130
+ "eval_samples_per_second": 324.441,
131
+ "eval_steps_per_second": 20.487,
132
+ "step": 1128
133
+ },
134
+ {
135
+ "epoch": 9.0,
136
+ "eval_loss": 0.1488533467054367,
137
+ "eval_macro_f1": 0.5590641407878995,
138
+ "eval_macro_precision": 0.6102853068447548,
139
+ "eval_macro_recall": 0.5269868274318028,
140
+ "eval_micro_f1": 0.755420054200542,
141
+ "eval_micro_precision": 0.7857646229739254,
142
+ "eval_micro_recall": 0.7273320287018917,
143
+ "eval_runtime": 2.9723,
144
+ "eval_samples_per_second": 324.996,
145
+ "eval_steps_per_second": 20.523,
146
+ "step": 1269
147
+ },
148
+ {
149
+ "epoch": 10.0,
150
+ "eval_loss": 0.15144900977611542,
151
+ "eval_macro_f1": 0.5597430867412742,
152
+ "eval_macro_precision": 0.6112400331236583,
153
+ "eval_macro_recall": 0.5287805393050224,
154
+ "eval_micro_f1": 0.7608550434201737,
155
+ "eval_micro_precision": 0.7796030116358659,
156
+ "eval_micro_recall": 0.7429876060013046,
157
+ "eval_runtime": 2.9735,
158
+ "eval_samples_per_second": 324.869,
159
+ "eval_steps_per_second": 20.515,
160
+ "step": 1410
161
  },
162
  {
163
  "epoch": 10.64,
164
  "learning_rate": 2.6808510638297873e-05,
165
+ "loss": 0.0424,
166
  "step": 1500
167
  },
168
  {
169
+ "epoch": 11.0,
170
+ "eval_loss": 0.15306253731250763,
171
+ "eval_macro_f1": 0.5476291897617486,
172
+ "eval_macro_precision": 0.6042943362522458,
173
+ "eval_macro_recall": 0.5150958586698227,
174
+ "eval_micro_f1": 0.7621293800539084,
175
+ "eval_micro_precision": 0.7881533101045296,
176
+ "eval_micro_recall": 0.7377690802348337,
177
+ "eval_runtime": 2.9994,
178
+ "eval_samples_per_second": 322.068,
179
+ "eval_steps_per_second": 20.338,
180
+ "step": 1551
181
+ },
182
+ {
183
+ "epoch": 12.0,
184
+ "eval_loss": 0.15226905047893524,
185
+ "eval_macro_f1": 0.5645136859707334,
186
+ "eval_macro_precision": 0.6250404048441249,
187
+ "eval_macro_recall": 0.5290350408209084,
188
+ "eval_micro_f1": 0.7750586657727119,
189
+ "eval_micro_precision": 0.7972413793103448,
190
+ "eval_micro_recall": 0.7540769732550554,
191
+ "eval_runtime": 2.9853,
192
+ "eval_samples_per_second": 323.581,
193
+ "eval_steps_per_second": 20.433,
194
+ "step": 1692
195
+ },
196
+ {
197
+ "epoch": 13.0,
198
+ "eval_loss": 0.15537378191947937,
199
+ "eval_macro_f1": 0.5801357636140765,
200
+ "eval_macro_precision": 0.6557226559864182,
201
+ "eval_macro_recall": 0.5300773127347133,
202
+ "eval_micro_f1": 0.7715736040609137,
203
+ "eval_micro_precision": 0.8016877637130801,
204
+ "eval_micro_recall": 0.7436399217221135,
205
+ "eval_runtime": 4.9377,
206
+ "eval_samples_per_second": 195.637,
207
+ "eval_steps_per_second": 12.354,
208
+ "step": 1833
209
+ },
210
+ {
211
+ "epoch": 14.0,
212
+ "eval_loss": 0.15421901643276215,
213
+ "eval_macro_f1": 0.5773329417808055,
214
+ "eval_macro_precision": 0.6485175319800287,
215
+ "eval_macro_recall": 0.5350306451399538,
216
+ "eval_micro_f1": 0.77552400270453,
217
+ "eval_micro_precision": 0.8049122807017544,
218
+ "eval_micro_recall": 0.7482061317677756,
219
+ "eval_runtime": 3.0097,
220
+ "eval_samples_per_second": 320.96,
221
+ "eval_steps_per_second": 20.268,
222
+ "step": 1974
223
  },
224
  {
225
  "epoch": 14.18,
226
  "learning_rate": 2.574468085106383e-05,
227
+ "loss": 0.0219,
228
  "step": 2000
229
  },
230
  {
231
+ "epoch": 15.0,
232
+ "eval_loss": 0.15735264122486115,
233
+ "eval_macro_f1": 0.5661412601117408,
234
+ "eval_macro_precision": 0.5937031922346153,
235
+ "eval_macro_recall": 0.5467428221378843,
236
+ "eval_micro_f1": 0.7738255033557048,
237
+ "eval_micro_precision": 0.796821008984105,
238
+ "eval_micro_recall": 0.7521200260926288,
239
+ "eval_runtime": 2.9854,
240
+ "eval_samples_per_second": 323.576,
241
+ "eval_steps_per_second": 20.433,
242
+ "step": 2115
243
+ },
244
+ {
245
+ "epoch": 16.0,
246
+ "eval_loss": 0.1617126762866974,
247
+ "eval_macro_f1": 0.5686269653829785,
248
+ "eval_macro_precision": 0.6170850785135864,
249
+ "eval_macro_recall": 0.5368268361090598,
250
+ "eval_micro_f1": 0.779524924723988,
251
+ "eval_micro_precision": 0.8001373626373627,
252
+ "eval_micro_recall": 0.7599478147423353,
253
+ "eval_runtime": 2.9891,
254
+ "eval_samples_per_second": 323.178,
255
+ "eval_steps_per_second": 20.408,
256
+ "step": 2256
257
+ },
258
+ {
259
+ "epoch": 17.0,
260
+ "eval_loss": 0.16439199447631836,
261
+ "eval_macro_f1": 0.5741609962540627,
262
+ "eval_macro_precision": 0.6203354824423237,
263
+ "eval_macro_recall": 0.5508776094197674,
264
+ "eval_micro_f1": 0.7765251989389921,
265
+ "eval_micro_precision": 0.7896156439649359,
266
+ "eval_micro_recall": 0.7638617090671885,
267
+ "eval_runtime": 2.979,
268
+ "eval_samples_per_second": 324.271,
269
+ "eval_steps_per_second": 20.477,
270
+ "step": 2397
271
  },
272
  {
273
  "epoch": 17.73,
274
  "learning_rate": 2.4680851063829786e-05,
275
+ "loss": 0.013,
276
  "step": 2500
277
  },
278
  {
279
+ "epoch": 18.0,
280
+ "eval_loss": 0.16467925906181335,
281
+ "eval_macro_f1": 0.5828461745953328,
282
+ "eval_macro_precision": 0.6374781102044266,
283
+ "eval_macro_recall": 0.5491770837420844,
284
+ "eval_micro_f1": 0.7797519275896748,
285
+ "eval_micro_precision": 0.8020689655172414,
286
+ "eval_micro_recall": 0.7586431833007176,
287
+ "eval_runtime": 2.9765,
288
+ "eval_samples_per_second": 324.537,
289
+ "eval_steps_per_second": 20.494,
290
+ "step": 2538
291
+ },
292
+ {
293
+ "epoch": 19.0,
294
+ "eval_loss": 0.17651152610778809,
295
+ "eval_macro_f1": 0.578653206536447,
296
+ "eval_macro_precision": 0.5940445993831301,
297
+ "eval_macro_recall": 0.5738532439728641,
298
+ "eval_micro_f1": 0.7715868361029651,
299
+ "eval_micro_precision": 0.7708333333333334,
300
+ "eval_micro_recall": 0.7723418134377038,
301
+ "eval_runtime": 2.9833,
302
+ "eval_samples_per_second": 323.805,
303
+ "eval_steps_per_second": 20.447,
304
+ "step": 2679
305
+ },
306
+ {
307
+ "epoch": 20.0,
308
+ "eval_loss": 0.17114990949630737,
309
+ "eval_macro_f1": 0.5779704862006727,
310
+ "eval_macro_precision": 0.6285916131953179,
311
+ "eval_macro_recall": 0.5455200342016094,
312
+ "eval_micro_f1": 0.7783711615487315,
313
+ "eval_micro_precision": 0.7969924812030075,
314
+ "eval_micro_recall": 0.7606001304631441,
315
+ "eval_runtime": 2.9774,
316
+ "eval_samples_per_second": 324.449,
317
+ "eval_steps_per_second": 20.488,
318
+ "step": 2820
319
  },
320
  {
321
+ "epoch": 21.0,
322
+ "eval_loss": 0.1727105975151062,
323
+ "eval_macro_f1": 0.58822980593355,
324
+ "eval_macro_precision": 0.6578174885933932,
325
+ "eval_macro_recall": 0.5467783967982416,
326
+ "eval_micro_f1": 0.782282793867121,
327
+ "eval_micro_precision": 0.818830242510699,
328
+ "eval_micro_recall": 0.7488584474885844,
329
+ "eval_runtime": 2.9806,
330
+ "eval_samples_per_second": 324.101,
331
+ "eval_steps_per_second": 20.466,
332
+ "step": 2961
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333
  }
334
  ],
335
  "max_steps": 14100,
336
  "num_train_epochs": 100,
337
+ "total_flos": 1.2360197339395008e+16,
338
  "trial_name": null,
339
  "trial_params": null
340
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b32d80d5d5dce67c9a2a11756aa0d57c15b20ca460c1ef2d87c1f7b006d31804
3
- size 3247
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:438b946a390e0a080e0951e1d1cf250efa3125664bf79c3baf90d4c23e353fcb
3
+ size 3183