Augusto777 commited on
Commit
280c73c
·
verified ·
1 Parent(s): 6d6bda6

End of training

Browse files
README.md ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: MBZUAI/swiftformer-xs
3
+ tags:
4
+ - generated_from_trainer
5
+ datasets:
6
+ - imagefolder
7
+ metrics:
8
+ - accuracy
9
+ model-index:
10
+ - name: swiftformer-xs-DMAE
11
+ results:
12
+ - task:
13
+ name: Image Classification
14
+ type: image-classification
15
+ dataset:
16
+ name: imagefolder
17
+ type: imagefolder
18
+ config: default
19
+ split: validation
20
+ args: default
21
+ metrics:
22
+ - name: Accuracy
23
+ type: accuracy
24
+ value: 0.10869565217391304
25
+ ---
26
+
27
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
28
+ should probably proofread and complete it, then remove this comment. -->
29
+
30
+ # swiftformer-xs-DMAE
31
+
32
+ This model is a fine-tuned version of [MBZUAI/swiftformer-xs](https://huggingface.co/MBZUAI/swiftformer-xs) on the imagefolder dataset.
33
+ It achieves the following results on the evaluation set:
34
+ - Loss: 113.9563
35
+ - Accuracy: 0.1087
36
+
37
+ ## Model description
38
+
39
+ More information needed
40
+
41
+ ## Intended uses & limitations
42
+
43
+ More information needed
44
+
45
+ ## Training and evaluation data
46
+
47
+ More information needed
48
+
49
+ ## Training procedure
50
+
51
+ ### Training hyperparameters
52
+
53
+ The following hyperparameters were used during training:
54
+ - learning_rate: 0.00015
55
+ - train_batch_size: 16
56
+ - eval_batch_size: 16
57
+ - seed: 42
58
+ - gradient_accumulation_steps: 4
59
+ - total_train_batch_size: 64
60
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
61
+ - lr_scheduler_type: linear
62
+ - lr_scheduler_warmup_ratio: 0.1
63
+ - num_epochs: 40
64
+
65
+ ### Training results
66
+
67
+ | Training Loss | Epoch | Step | Validation Loss | Accuracy |
68
+ |:-------------:|:-----:|:----:|:---------------:|:--------:|
69
+ | No log | 0.86 | 3 | 113.9563 | 0.1087 |
70
+ | No log | 2.0 | 7 | 113.9524 | 0.1087 |
71
+ | 114.2271 | 2.86 | 10 | 113.9454 | 0.1087 |
72
+ | 114.2271 | 4.0 | 14 | 113.9389 | 0.1087 |
73
+ | 114.2271 | 4.86 | 17 | 113.9226 | 0.1087 |
74
+ | 113.5566 | 6.0 | 21 | 113.8982 | 0.1087 |
75
+ | 113.5566 | 6.86 | 24 | 113.8425 | 0.1087 |
76
+ | 113.5566 | 8.0 | 28 | 113.7478 | 0.1087 |
77
+ | 111.6907 | 8.86 | 31 | 113.6538 | 0.1087 |
78
+ | 111.6907 | 10.0 | 35 | 113.5589 | 0.1087 |
79
+ | 111.6907 | 10.86 | 38 | 113.5002 | 0.1087 |
80
+ | 115.67 | 12.0 | 42 | 113.4496 | 0.1087 |
81
+ | 115.67 | 12.86 | 45 | 113.3752 | 0.1087 |
82
+ | 115.67 | 14.0 | 49 | 113.2129 | 0.1087 |
83
+ | 111.8054 | 14.86 | 52 | 113.0828 | 0.1087 |
84
+ | 111.8054 | 16.0 | 56 | 112.8805 | 0.1087 |
85
+ | 111.8054 | 16.86 | 59 | 112.9013 | 0.1087 |
86
+ | 112.3831 | 18.0 | 63 | 112.8123 | 0.1087 |
87
+ | 112.3831 | 18.86 | 66 | 113.0190 | 0.1087 |
88
+ | 113.1097 | 20.0 | 70 | 113.2929 | 0.1087 |
89
+ | 113.1097 | 20.86 | 73 | 112.8861 | 0.1087 |
90
+ | 113.1097 | 22.0 | 77 | 112.7154 | 0.1087 |
91
+ | 113.3674 | 22.86 | 80 | 112.6943 | 0.1087 |
92
+ | 113.3674 | 24.0 | 84 | 112.3937 | 0.1087 |
93
+ | 113.3674 | 24.86 | 87 | 112.3862 | 0.1087 |
94
+ | 113.1472 | 26.0 | 91 | 112.2693 | 0.1087 |
95
+ | 113.1472 | 26.86 | 94 | 112.3107 | 0.1087 |
96
+ | 113.1472 | 28.0 | 98 | 112.4216 | 0.1087 |
97
+ | 111.3252 | 28.86 | 101 | 112.3318 | 0.1087 |
98
+ | 111.3252 | 30.0 | 105 | 112.3517 | 0.1087 |
99
+ | 111.3252 | 30.86 | 108 | 112.4213 | 0.1087 |
100
+ | 112.827 | 32.0 | 112 | 112.4838 | 0.1087 |
101
+ | 112.827 | 32.86 | 115 | 112.4490 | 0.1087 |
102
+ | 112.827 | 34.0 | 119 | 112.1525 | 0.1087 |
103
+ | 112.5631 | 34.29 | 120 | 112.1956 | 0.1087 |
104
+
105
+
106
+ ### Framework versions
107
+
108
+ - Transformers 4.36.2
109
+ - Pytorch 2.1.2+cu118
110
+ - Datasets 2.16.1
111
+ - Tokenizers 0.15.0
all_results.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 34.29,
3
+ "eval_accuracy": 0.10869565217391304,
4
+ "eval_loss": 113.95629119873047,
5
+ "eval_runtime": 0.6942,
6
+ "eval_samples_per_second": 66.266,
7
+ "eval_steps_per_second": 4.322,
8
+ "train_loss": 112.97269897460937,
9
+ "train_runtime": 134.324,
10
+ "train_samples_per_second": 63.429,
11
+ "train_steps_per_second": 0.893
12
+ }
config.json ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "MBZUAI/swiftformer-xs",
3
+ "architectures": [
4
+ "SwiftFormerForImageClassification"
5
+ ],
6
+ "batch_norm_eps": 1e-05,
7
+ "depths": [
8
+ 3,
9
+ 3,
10
+ 6,
11
+ 4
12
+ ],
13
+ "down_pad": 1,
14
+ "down_patch_size": 3,
15
+ "down_stride": 2,
16
+ "downsamples": [
17
+ true,
18
+ true,
19
+ true,
20
+ true
21
+ ],
22
+ "drop_path_rate": 0.0,
23
+ "embed_dims": [
24
+ 48,
25
+ 56,
26
+ 112,
27
+ 220
28
+ ],
29
+ "hidden_act": "gelu",
30
+ "id2label": {
31
+ "0": "avanzada",
32
+ "1": "leve",
33
+ "2": "moderada",
34
+ "3": "no dmae"
35
+ },
36
+ "label2id": {
37
+ "avanzada": 0,
38
+ "leve": 1,
39
+ "moderada": 2,
40
+ "no dmae": 3
41
+ },
42
+ "layer_scale_init_value": 1e-05,
43
+ "mlp_ratio": 4,
44
+ "model_type": "swiftformer",
45
+ "num_channels": 3,
46
+ "problem_type": "single_label_classification",
47
+ "torch_dtype": "float32",
48
+ "transformers_version": "4.36.2",
49
+ "use_layer_scale": true
50
+ }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 34.29,
3
+ "eval_accuracy": 0.10869565217391304,
4
+ "eval_loss": 113.95629119873047,
5
+ "eval_runtime": 0.6942,
6
+ "eval_samples_per_second": 66.266,
7
+ "eval_steps_per_second": 4.322
8
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fc36f501fe972e0a07bf8fed8813c2c5211b54394de8eaa01f7bad55db3aae1
3
+ size 12203648
preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_rescale": true,
4
+ "do_resize": true,
5
+ "image_mean": [
6
+ 0.485,
7
+ 0.456,
8
+ 0.406
9
+ ],
10
+ "image_processor_type": "ViTImageProcessor",
11
+ "image_std": [
12
+ 0.229,
13
+ 0.224,
14
+ 0.225
15
+ ],
16
+ "resample": 2,
17
+ "rescale_factor": 0.00392156862745098,
18
+ "size": {
19
+ "height": 224,
20
+ "width": 224
21
+ }
22
+ }
runs/Dec01_10-42-21_DESKTOP-SKBE9FB/events.out.tfevents.1733071343.DESKTOP-SKBE9FB.11644.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4177d74506b736f9d14b6b696eeb693f9412ffd092e75c62fbd6f6340e61160f
3
+ size 17835
runs/Dec01_10-42-21_DESKTOP-SKBE9FB/events.out.tfevents.1733071478.DESKTOP-SKBE9FB.11644.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f5ac2946f52f34d412df781fbedc43b7d62b160c5184bcb2ba02b5c72d7e3cd
3
+ size 405
train_results.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 34.29,
3
+ "train_loss": 112.97269897460937,
4
+ "train_runtime": 134.324,
5
+ "train_samples_per_second": 63.429,
6
+ "train_steps_per_second": 0.893
7
+ }
trainer_state.json ADDED
@@ -0,0 +1,417 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.10869565217391304,
3
+ "best_model_checkpoint": "swiftformer-xs-DMAE\\checkpoint-3",
4
+ "epoch": 34.285714285714285,
5
+ "eval_steps": 500,
6
+ "global_step": 120,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.86,
13
+ "eval_accuracy": 0.10869565217391304,
14
+ "eval_loss": 113.95629119873047,
15
+ "eval_runtime": 0.6964,
16
+ "eval_samples_per_second": 66.055,
17
+ "eval_steps_per_second": 4.308,
18
+ "step": 3
19
+ },
20
+ {
21
+ "epoch": 2.0,
22
+ "eval_accuracy": 0.10869565217391304,
23
+ "eval_loss": 113.952392578125,
24
+ "eval_runtime": 0.6137,
25
+ "eval_samples_per_second": 74.961,
26
+ "eval_steps_per_second": 4.889,
27
+ "step": 7
28
+ },
29
+ {
30
+ "epoch": 2.86,
31
+ "learning_rate": 0.000125,
32
+ "loss": 114.2271,
33
+ "step": 10
34
+ },
35
+ {
36
+ "epoch": 2.86,
37
+ "eval_accuracy": 0.10869565217391304,
38
+ "eval_loss": 113.94542694091797,
39
+ "eval_runtime": 0.6086,
40
+ "eval_samples_per_second": 75.578,
41
+ "eval_steps_per_second": 4.929,
42
+ "step": 10
43
+ },
44
+ {
45
+ "epoch": 4.0,
46
+ "eval_accuracy": 0.10869565217391304,
47
+ "eval_loss": 113.93885803222656,
48
+ "eval_runtime": 0.6092,
49
+ "eval_samples_per_second": 75.515,
50
+ "eval_steps_per_second": 4.925,
51
+ "step": 14
52
+ },
53
+ {
54
+ "epoch": 4.86,
55
+ "eval_accuracy": 0.10869565217391304,
56
+ "eval_loss": 113.9226303100586,
57
+ "eval_runtime": 0.6577,
58
+ "eval_samples_per_second": 69.945,
59
+ "eval_steps_per_second": 4.562,
60
+ "step": 17
61
+ },
62
+ {
63
+ "epoch": 5.71,
64
+ "learning_rate": 0.0001388888888888889,
65
+ "loss": 113.5566,
66
+ "step": 20
67
+ },
68
+ {
69
+ "epoch": 6.0,
70
+ "eval_accuracy": 0.10869565217391304,
71
+ "eval_loss": 113.89823913574219,
72
+ "eval_runtime": 0.6091,
73
+ "eval_samples_per_second": 75.517,
74
+ "eval_steps_per_second": 4.925,
75
+ "step": 21
76
+ },
77
+ {
78
+ "epoch": 6.86,
79
+ "eval_accuracy": 0.10869565217391304,
80
+ "eval_loss": 113.84252166748047,
81
+ "eval_runtime": 0.6192,
82
+ "eval_samples_per_second": 74.295,
83
+ "eval_steps_per_second": 4.845,
84
+ "step": 24
85
+ },
86
+ {
87
+ "epoch": 8.0,
88
+ "eval_accuracy": 0.10869565217391304,
89
+ "eval_loss": 113.7478256225586,
90
+ "eval_runtime": 0.6437,
91
+ "eval_samples_per_second": 71.467,
92
+ "eval_steps_per_second": 4.661,
93
+ "step": 28
94
+ },
95
+ {
96
+ "epoch": 8.57,
97
+ "learning_rate": 0.000125,
98
+ "loss": 111.6907,
99
+ "step": 30
100
+ },
101
+ {
102
+ "epoch": 8.86,
103
+ "eval_accuracy": 0.10869565217391304,
104
+ "eval_loss": 113.65383911132812,
105
+ "eval_runtime": 0.6171,
106
+ "eval_samples_per_second": 74.538,
107
+ "eval_steps_per_second": 4.861,
108
+ "step": 31
109
+ },
110
+ {
111
+ "epoch": 10.0,
112
+ "eval_accuracy": 0.10869565217391304,
113
+ "eval_loss": 113.55889892578125,
114
+ "eval_runtime": 0.6282,
115
+ "eval_samples_per_second": 73.23,
116
+ "eval_steps_per_second": 4.776,
117
+ "step": 35
118
+ },
119
+ {
120
+ "epoch": 10.86,
121
+ "eval_accuracy": 0.10869565217391304,
122
+ "eval_loss": 113.500244140625,
123
+ "eval_runtime": 0.6501,
124
+ "eval_samples_per_second": 70.753,
125
+ "eval_steps_per_second": 4.614,
126
+ "step": 38
127
+ },
128
+ {
129
+ "epoch": 11.43,
130
+ "learning_rate": 0.00011111111111111109,
131
+ "loss": 115.67,
132
+ "step": 40
133
+ },
134
+ {
135
+ "epoch": 12.0,
136
+ "eval_accuracy": 0.10869565217391304,
137
+ "eval_loss": 113.4496078491211,
138
+ "eval_runtime": 0.6246,
139
+ "eval_samples_per_second": 73.641,
140
+ "eval_steps_per_second": 4.803,
141
+ "step": 42
142
+ },
143
+ {
144
+ "epoch": 12.86,
145
+ "eval_accuracy": 0.10869565217391304,
146
+ "eval_loss": 113.37518310546875,
147
+ "eval_runtime": 0.6266,
148
+ "eval_samples_per_second": 73.406,
149
+ "eval_steps_per_second": 4.787,
150
+ "step": 45
151
+ },
152
+ {
153
+ "epoch": 14.0,
154
+ "eval_accuracy": 0.10869565217391304,
155
+ "eval_loss": 113.21294403076172,
156
+ "eval_runtime": 0.7182,
157
+ "eval_samples_per_second": 64.052,
158
+ "eval_steps_per_second": 4.177,
159
+ "step": 49
160
+ },
161
+ {
162
+ "epoch": 14.29,
163
+ "learning_rate": 9.722222222222222e-05,
164
+ "loss": 111.8054,
165
+ "step": 50
166
+ },
167
+ {
168
+ "epoch": 14.86,
169
+ "eval_accuracy": 0.10869565217391304,
170
+ "eval_loss": 113.0827865600586,
171
+ "eval_runtime": 0.6252,
172
+ "eval_samples_per_second": 73.582,
173
+ "eval_steps_per_second": 4.799,
174
+ "step": 52
175
+ },
176
+ {
177
+ "epoch": 16.0,
178
+ "eval_accuracy": 0.10869565217391304,
179
+ "eval_loss": 112.88050842285156,
180
+ "eval_runtime": 0.6316,
181
+ "eval_samples_per_second": 72.825,
182
+ "eval_steps_per_second": 4.749,
183
+ "step": 56
184
+ },
185
+ {
186
+ "epoch": 16.86,
187
+ "eval_accuracy": 0.10869565217391304,
188
+ "eval_loss": 112.90125274658203,
189
+ "eval_runtime": 0.6246,
190
+ "eval_samples_per_second": 73.642,
191
+ "eval_steps_per_second": 4.803,
192
+ "step": 59
193
+ },
194
+ {
195
+ "epoch": 17.14,
196
+ "learning_rate": 8.333333333333333e-05,
197
+ "loss": 112.3831,
198
+ "step": 60
199
+ },
200
+ {
201
+ "epoch": 18.0,
202
+ "eval_accuracy": 0.10869565217391304,
203
+ "eval_loss": 112.812255859375,
204
+ "eval_runtime": 0.6547,
205
+ "eval_samples_per_second": 70.263,
206
+ "eval_steps_per_second": 4.582,
207
+ "step": 63
208
+ },
209
+ {
210
+ "epoch": 18.86,
211
+ "eval_accuracy": 0.10869565217391304,
212
+ "eval_loss": 113.01899719238281,
213
+ "eval_runtime": 0.6161,
214
+ "eval_samples_per_second": 74.659,
215
+ "eval_steps_per_second": 4.869,
216
+ "step": 66
217
+ },
218
+ {
219
+ "epoch": 20.0,
220
+ "learning_rate": 6.944444444444444e-05,
221
+ "loss": 113.1097,
222
+ "step": 70
223
+ },
224
+ {
225
+ "epoch": 20.0,
226
+ "eval_accuracy": 0.10869565217391304,
227
+ "eval_loss": 113.29285430908203,
228
+ "eval_runtime": 0.6286,
229
+ "eval_samples_per_second": 73.173,
230
+ "eval_steps_per_second": 4.772,
231
+ "step": 70
232
+ },
233
+ {
234
+ "epoch": 20.86,
235
+ "eval_accuracy": 0.10869565217391304,
236
+ "eval_loss": 112.88607025146484,
237
+ "eval_runtime": 0.6372,
238
+ "eval_samples_per_second": 72.196,
239
+ "eval_steps_per_second": 4.708,
240
+ "step": 73
241
+ },
242
+ {
243
+ "epoch": 22.0,
244
+ "eval_accuracy": 0.10869565217391304,
245
+ "eval_loss": 112.71536254882812,
246
+ "eval_runtime": 0.6587,
247
+ "eval_samples_per_second": 69.838,
248
+ "eval_steps_per_second": 4.555,
249
+ "step": 77
250
+ },
251
+ {
252
+ "epoch": 22.86,
253
+ "learning_rate": 5.5555555555555545e-05,
254
+ "loss": 113.3674,
255
+ "step": 80
256
+ },
257
+ {
258
+ "epoch": 22.86,
259
+ "eval_accuracy": 0.10869565217391304,
260
+ "eval_loss": 112.69425964355469,
261
+ "eval_runtime": 0.6142,
262
+ "eval_samples_per_second": 74.9,
263
+ "eval_steps_per_second": 4.885,
264
+ "step": 80
265
+ },
266
+ {
267
+ "epoch": 24.0,
268
+ "eval_accuracy": 0.10869565217391304,
269
+ "eval_loss": 112.39366912841797,
270
+ "eval_runtime": 0.6652,
271
+ "eval_samples_per_second": 69.156,
272
+ "eval_steps_per_second": 4.51,
273
+ "step": 84
274
+ },
275
+ {
276
+ "epoch": 24.86,
277
+ "eval_accuracy": 0.10869565217391304,
278
+ "eval_loss": 112.38623809814453,
279
+ "eval_runtime": 0.6241,
280
+ "eval_samples_per_second": 73.701,
281
+ "eval_steps_per_second": 4.807,
282
+ "step": 87
283
+ },
284
+ {
285
+ "epoch": 25.71,
286
+ "learning_rate": 4.1666666666666665e-05,
287
+ "loss": 113.1472,
288
+ "step": 90
289
+ },
290
+ {
291
+ "epoch": 26.0,
292
+ "eval_accuracy": 0.10869565217391304,
293
+ "eval_loss": 112.26931762695312,
294
+ "eval_runtime": 0.6136,
295
+ "eval_samples_per_second": 74.962,
296
+ "eval_steps_per_second": 4.889,
297
+ "step": 91
298
+ },
299
+ {
300
+ "epoch": 26.86,
301
+ "eval_accuracy": 0.10869565217391304,
302
+ "eval_loss": 112.31065368652344,
303
+ "eval_runtime": 0.7012,
304
+ "eval_samples_per_second": 65.606,
305
+ "eval_steps_per_second": 4.279,
306
+ "step": 94
307
+ },
308
+ {
309
+ "epoch": 28.0,
310
+ "eval_accuracy": 0.10869565217391304,
311
+ "eval_loss": 112.42163848876953,
312
+ "eval_runtime": 0.6231,
313
+ "eval_samples_per_second": 73.819,
314
+ "eval_steps_per_second": 4.814,
315
+ "step": 98
316
+ },
317
+ {
318
+ "epoch": 28.57,
319
+ "learning_rate": 2.7777777777777772e-05,
320
+ "loss": 111.3252,
321
+ "step": 100
322
+ },
323
+ {
324
+ "epoch": 28.86,
325
+ "eval_accuracy": 0.10869565217391304,
326
+ "eval_loss": 112.33184051513672,
327
+ "eval_runtime": 0.6136,
328
+ "eval_samples_per_second": 74.962,
329
+ "eval_steps_per_second": 4.889,
330
+ "step": 101
331
+ },
332
+ {
333
+ "epoch": 30.0,
334
+ "eval_accuracy": 0.10869565217391304,
335
+ "eval_loss": 112.3516616821289,
336
+ "eval_runtime": 0.6282,
337
+ "eval_samples_per_second": 73.23,
338
+ "eval_steps_per_second": 4.776,
339
+ "step": 105
340
+ },
341
+ {
342
+ "epoch": 30.86,
343
+ "eval_accuracy": 0.10869565217391304,
344
+ "eval_loss": 112.42131042480469,
345
+ "eval_runtime": 0.6367,
346
+ "eval_samples_per_second": 72.253,
347
+ "eval_steps_per_second": 4.712,
348
+ "step": 108
349
+ },
350
+ {
351
+ "epoch": 31.43,
352
+ "learning_rate": 1.3888888888888886e-05,
353
+ "loss": 112.827,
354
+ "step": 110
355
+ },
356
+ {
357
+ "epoch": 32.0,
358
+ "eval_accuracy": 0.10869565217391304,
359
+ "eval_loss": 112.48377227783203,
360
+ "eval_runtime": 0.6321,
361
+ "eval_samples_per_second": 72.768,
362
+ "eval_steps_per_second": 4.746,
363
+ "step": 112
364
+ },
365
+ {
366
+ "epoch": 32.86,
367
+ "eval_accuracy": 0.10869565217391304,
368
+ "eval_loss": 112.44902801513672,
369
+ "eval_runtime": 0.6151,
370
+ "eval_samples_per_second": 74.779,
371
+ "eval_steps_per_second": 4.877,
372
+ "step": 115
373
+ },
374
+ {
375
+ "epoch": 34.0,
376
+ "eval_accuracy": 0.10869565217391304,
377
+ "eval_loss": 112.15248107910156,
378
+ "eval_runtime": 0.6382,
379
+ "eval_samples_per_second": 72.082,
380
+ "eval_steps_per_second": 4.701,
381
+ "step": 119
382
+ },
383
+ {
384
+ "epoch": 34.29,
385
+ "learning_rate": 0.0,
386
+ "loss": 112.5631,
387
+ "step": 120
388
+ },
389
+ {
390
+ "epoch": 34.29,
391
+ "eval_accuracy": 0.10869565217391304,
392
+ "eval_loss": 112.195556640625,
393
+ "eval_runtime": 0.6527,
394
+ "eval_samples_per_second": 70.482,
395
+ "eval_steps_per_second": 4.597,
396
+ "step": 120
397
+ },
398
+ {
399
+ "epoch": 34.29,
400
+ "step": 120,
401
+ "total_flos": 2.0027429927092224e+16,
402
+ "train_loss": 112.97269897460937,
403
+ "train_runtime": 134.324,
404
+ "train_samples_per_second": 63.429,
405
+ "train_steps_per_second": 0.893
406
+ }
407
+ ],
408
+ "logging_steps": 10,
409
+ "max_steps": 120,
410
+ "num_input_tokens_seen": 0,
411
+ "num_train_epochs": 40,
412
+ "save_steps": 500,
413
+ "total_flos": 2.0027429927092224e+16,
414
+ "train_batch_size": 16,
415
+ "trial_name": null,
416
+ "trial_params": null
417
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca22fbcceedf6a1587f3bdfcbff82d0938662a5874b0085c375ade4d287a4278
3
+ size 4728