OmAlve commited on
Commit
f7edb1e
·
verified ·
1 Parent(s): 8ca1266

added preprocessor

Browse files
README.md CHANGED
@@ -18,8 +18,8 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [google/vit-base-patch16-224](https://huggingface.co/google/vit-base-patch16-224) on the pcuenq/oxford-pets dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.0037
22
- - Accuracy: 0.9995
23
 
24
  ## Model description
25
 
@@ -39,8 +39,8 @@ More information needed
39
 
40
  The following hyperparameters were used during training:
41
  - learning_rate: 0.0003
42
- - train_batch_size: 64
43
- - eval_batch_size: 8
44
  - seed: 42
45
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
46
  - lr_scheduler_type: linear
@@ -51,15 +51,13 @@ The following hyperparameters were used during training:
51
 
52
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
53
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
54
- | 0.3546 | 0.86 | 100 | 0.1640 | 0.9545 |
55
- | 0.0922 | 1.72 | 200 | 0.0596 | 0.9850 |
56
- | 0.0421 | 2.59 | 300 | 0.0214 | 0.9939 |
57
- | 0.0087 | 3.45 | 400 | 0.0037 | 0.9995 |
58
 
59
 
60
  ### Framework versions
61
 
62
  - Transformers 4.39.2
63
- - Pytorch 2.2.1+cu121
64
- - Datasets 2.18.0
65
  - Tokenizers 0.15.2
 
18
 
19
  This model is a fine-tuned version of [google/vit-base-patch16-224](https://huggingface.co/google/vit-base-patch16-224) on the pcuenq/oxford-pets dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.0046
22
+ - Accuracy: 0.9989
23
 
24
  ## Model description
25
 
 
39
 
40
  The following hyperparameters were used during training:
41
  - learning_rate: 0.0003
42
+ - train_batch_size: 128
43
+ - eval_batch_size: 16
44
  - seed: 42
45
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
46
  - lr_scheduler_type: linear
 
51
 
52
  | Training Loss | Epoch | Step | Validation Loss | Accuracy |
53
  |:-------------:|:-----:|:----:|:---------------:|:--------:|
54
+ | 0.0887 | 1.72 | 100 | 0.0765 | 0.9774 |
55
+ | 0.0045 | 3.45 | 200 | 0.0046 | 0.9989 |
 
 
56
 
57
 
58
  ### Framework versions
59
 
60
  - Transformers 4.39.2
61
+ - Pytorch 2.1.2
62
+ - Datasets 2.16.0
63
  - Tokenizers 0.15.2
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.0,
3
  "total_flos": 2.2913817801515827e+18,
4
- "train_loss": 0.18540822506223903,
5
- "train_runtime": 740.8045,
6
- "train_samples_per_second": 39.903,
7
- "train_steps_per_second": 0.626
8
  }
 
1
  {
2
  "epoch": 4.0,
3
  "total_flos": 2.2913817801515827e+18,
4
+ "train_loss": 0.202088082896682,
5
+ "train_runtime": 854.7685,
6
+ "train_samples_per_second": 34.582,
7
+ "train_steps_per_second": 0.271
8
  }
config.json CHANGED
@@ -9,85 +9,85 @@
9
  "hidden_dropout_prob": 0.0,
10
  "hidden_size": 768,
11
  "id2label": {
12
- "0": "yorkshire terrier Dog",
13
- "1": "Birman Cat",
14
- "10": "pug Dog",
15
- "11": "Persian Cat",
16
- "12": "english setter Dog",
17
- "13": "american pit bull terrier Dog",
18
- "14": "chihuahua Dog",
19
- "15": "pomeranian Dog",
20
- "16": "great pyrenees Dog",
21
- "17": "scottish terrier Dog",
22
- "18": "miniature pinscher Dog",
23
- "19": "Sphynx Cat",
24
- "2": "german shorthaired Dog",
25
- "20": "Maine Coon Cat",
26
- "21": "saint bernard Dog",
27
- "22": "Siamese Cat",
28
- "23": "american bulldog Dog",
29
- "24": "newfoundland Dog",
30
- "25": "Abyssinian Cat",
31
- "26": "leonberger Dog",
32
- "27": "beagle Dog",
33
- "28": "Egyptian Mau Cat",
34
- "29": "boxer Dog",
35
- "3": "keeshond Dog",
36
- "30": "British Shorthair Cat",
37
- "31": "staffordshire bull terrier Dog",
38
- "32": "samoyed Dog",
39
- "33": "basset hound Dog",
40
- "34": "Bengal Cat",
41
- "35": "Bombay Cat",
42
- "36": "Russian Blue Cat",
43
- "4": "havanese Dog",
44
- "5": "wheaten terrier Dog",
45
- "6": "shiba inu Dog",
46
- "7": "Ragdoll Cat",
47
- "8": "japanese chin Dog",
48
- "9": "english cocker spaniel Dog"
49
  },
50
  "image_size": 224,
51
  "initializer_range": 0.02,
52
  "intermediate_size": 3072,
53
  "label2id": {
54
- "Abyssinian Cat": "25",
55
- "Bengal Cat": "34",
56
- "Birman Cat": "1",
57
- "Bombay Cat": "35",
58
- "British Shorthair Cat": "30",
59
- "Egyptian Mau Cat": "28",
60
- "Maine Coon Cat": "20",
61
- "Persian Cat": "11",
62
- "Ragdoll Cat": "7",
63
- "Russian Blue Cat": "36",
64
- "Siamese Cat": "22",
65
- "Sphynx Cat": "19",
66
- "american bulldog Dog": "23",
67
- "american pit bull terrier Dog": "13",
68
- "basset hound Dog": "33",
69
- "beagle Dog": "27",
70
- "boxer Dog": "29",
71
- "chihuahua Dog": "14",
72
- "english cocker spaniel Dog": "9",
73
- "english setter Dog": "12",
74
- "german shorthaired Dog": "2",
75
- "great pyrenees Dog": "16",
76
- "havanese Dog": "4",
77
- "japanese chin Dog": "8",
78
- "keeshond Dog": "3",
79
- "leonberger Dog": "26",
80
- "miniature pinscher Dog": "18",
81
- "newfoundland Dog": "24",
82
- "pomeranian Dog": "15",
83
- "pug Dog": "10",
84
- "saint bernard Dog": "21",
85
- "samoyed Dog": "32",
86
- "scottish terrier Dog": "17",
87
- "shiba inu Dog": "6",
88
- "staffordshire bull terrier Dog": "31",
89
- "wheaten terrier Dog": "5",
90
- "yorkshire terrier Dog": "0"
91
  },
92
  "layer_norm_eps": 1e-12,
93
  "model_type": "vit",
 
9
  "hidden_dropout_prob": 0.0,
10
  "hidden_size": 768,
11
  "id2label": {
12
+ "0": "pug Dog",
13
+ "1": "japanese chin Dog",
14
+ "10": "Ragdoll Cat",
15
+ "11": "scottish terrier Dog",
16
+ "12": "shiba inu Dog",
17
+ "13": "chihuahua Dog",
18
+ "14": "samoyed Dog",
19
+ "15": "Maine Coon Cat",
20
+ "16": "newfoundland Dog",
21
+ "17": "Abyssinian Cat",
22
+ "18": "saint bernard Dog",
23
+ "19": "Persian Cat",
24
+ "2": "american pit bull terrier Dog",
25
+ "20": "american bulldog Dog",
26
+ "21": "boxer Dog",
27
+ "22": "leonberger Dog",
28
+ "23": "german shorthaired Dog",
29
+ "24": "staffordshire bull terrier Dog",
30
+ "25": "Birman Cat",
31
+ "26": "english cocker spaniel Dog",
32
+ "27": "english setter Dog",
33
+ "28": "Siamese Cat",
34
+ "29": "Sphynx Cat",
35
+ "3": "beagle Dog",
36
+ "30": "Bombay Cat",
37
+ "31": "keeshond Dog",
38
+ "32": "havanese Dog",
39
+ "33": "Bengal Cat",
40
+ "34": "great pyrenees Dog",
41
+ "35": "Russian Blue Cat",
42
+ "36": "basset hound Dog",
43
+ "4": "miniature pinscher Dog",
44
+ "5": "pomeranian Dog",
45
+ "6": "yorkshire terrier Dog",
46
+ "7": "Egyptian Mau Cat",
47
+ "8": "British Shorthair Cat",
48
+ "9": "wheaten terrier Dog"
49
  },
50
  "image_size": 224,
51
  "initializer_range": 0.02,
52
  "intermediate_size": 3072,
53
  "label2id": {
54
+ "Abyssinian Cat": "17",
55
+ "Bengal Cat": "33",
56
+ "Birman Cat": "25",
57
+ "Bombay Cat": "30",
58
+ "British Shorthair Cat": "8",
59
+ "Egyptian Mau Cat": "7",
60
+ "Maine Coon Cat": "15",
61
+ "Persian Cat": "19",
62
+ "Ragdoll Cat": "10",
63
+ "Russian Blue Cat": "35",
64
+ "Siamese Cat": "28",
65
+ "Sphynx Cat": "29",
66
+ "american bulldog Dog": "20",
67
+ "american pit bull terrier Dog": "2",
68
+ "basset hound Dog": "36",
69
+ "beagle Dog": "3",
70
+ "boxer Dog": "21",
71
+ "chihuahua Dog": "13",
72
+ "english cocker spaniel Dog": "26",
73
+ "english setter Dog": "27",
74
+ "german shorthaired Dog": "23",
75
+ "great pyrenees Dog": "34",
76
+ "havanese Dog": "32",
77
+ "japanese chin Dog": "1",
78
+ "keeshond Dog": "31",
79
+ "leonberger Dog": "22",
80
+ "miniature pinscher Dog": "4",
81
+ "newfoundland Dog": "16",
82
+ "pomeranian Dog": "5",
83
+ "pug Dog": "0",
84
+ "saint bernard Dog": "18",
85
+ "samoyed Dog": "14",
86
+ "scottish terrier Dog": "11",
87
+ "shiba inu Dog": "12",
88
+ "staffordshire bull terrier Dog": "24",
89
+ "wheaten terrier Dog": "9",
90
+ "yorkshire terrier Dog": "6"
91
  },
92
  "layer_norm_eps": 1e-12,
93
  "model_type": "vit",
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b2184cabd333fc0f02fa6c917e9aaec72c255443c28de6752429d9b05a440e5
3
  size 343331644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04c848cc43804867db29602b8504492e7669c8936edf4fab2780cde439ea8afb
3
  size 343331644
preprocessor_config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_valid_processor_keys": [
3
+ "images",
4
+ "do_resize",
5
+ "size",
6
+ "resample",
7
+ "do_rescale",
8
+ "rescale_factor",
9
+ "do_normalize",
10
+ "image_mean",
11
+ "image_std",
12
+ "return_tensors",
13
+ "data_format",
14
+ "input_data_format"
15
+ ],
16
+ "do_normalize": true,
17
+ "do_rescale": true,
18
+ "do_resize": true,
19
+ "image_mean": [
20
+ 0.5,
21
+ 0.5,
22
+ 0.5
23
+ ],
24
+ "image_processor_type": "ViTImageProcessor",
25
+ "image_std": [
26
+ 0.5,
27
+ 0.5,
28
+ 0.5
29
+ ],
30
+ "resample": 2,
31
+ "rescale_factor": 0.00392156862745098,
32
+ "size": {
33
+ "height": 224,
34
+ "width": 224
35
+ }
36
+ }
runs/Mar30_10-54-27_5905f67ec798/events.out.tfevents.1711796077.5905f67ec798.70.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc36ebdd4905eceaa768a5d952f7b8c86dce579aecdb19b0a5d5bf054a00e34a
3
+ size 12492
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 4.0,
3
  "total_flos": 2.2913817801515827e+18,
4
- "train_loss": 0.18540822506223903,
5
- "train_runtime": 740.8045,
6
- "train_samples_per_second": 39.903,
7
- "train_steps_per_second": 0.626
8
  }
 
1
  {
2
  "epoch": 4.0,
3
  "total_flos": 2.2913817801515827e+18,
4
+ "train_loss": 0.202088082896682,
5
+ "train_runtime": 854.7685,
6
+ "train_samples_per_second": 34.582,
7
+ "train_steps_per_second": 0.271
8
  }
trainer_state.json CHANGED
@@ -1,388 +1,209 @@
1
  {
2
- "best_metric": 0.0037413176614791155,
3
- "best_model_checkpoint": "./vit-base-pets/checkpoint-400",
4
  "epoch": 4.0,
5
  "eval_steps": 100,
6
- "global_step": 464,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
- {
12
- "epoch": 0.09,
13
- "grad_norm": 3.7365024089813232,
14
- "learning_rate": 0.00029353448275862065,
15
- "loss": 2.333,
16
- "step": 10
17
- },
18
  {
19
  "epoch": 0.17,
20
- "grad_norm": 2.494818925857544,
21
  "learning_rate": 0.0002870689655172413,
22
- "loss": 0.7182,
23
- "step": 20
24
- },
25
- {
26
- "epoch": 0.26,
27
- "grad_norm": 2.9715936183929443,
28
- "learning_rate": 0.00028060344827586205,
29
- "loss": 0.4629,
30
- "step": 30
31
  },
32
  {
33
  "epoch": 0.34,
34
- "grad_norm": 3.1811089515686035,
35
  "learning_rate": 0.0002741379310344827,
36
- "loss": 0.4464,
37
- "step": 40
38
- },
39
- {
40
- "epoch": 0.43,
41
- "grad_norm": 3.517517566680908,
42
- "learning_rate": 0.00026767241379310345,
43
- "loss": 0.3636,
44
- "step": 50
45
  },
46
  {
47
  "epoch": 0.52,
48
- "grad_norm": 1.850231647491455,
49
  "learning_rate": 0.00026120689655172413,
50
- "loss": 0.3916,
51
- "step": 60
52
- },
53
- {
54
- "epoch": 0.6,
55
- "grad_norm": 3.451078414916992,
56
- "learning_rate": 0.0002547413793103448,
57
- "loss": 0.423,
58
- "step": 70
59
  },
60
  {
61
  "epoch": 0.69,
62
- "grad_norm": 3.729478359222412,
63
  "learning_rate": 0.0002482758620689655,
64
- "loss": 0.4177,
65
- "step": 80
66
- },
67
- {
68
- "epoch": 0.78,
69
- "grad_norm": 3.4009933471679688,
70
- "learning_rate": 0.00024181034482758618,
71
- "loss": 0.3164,
72
- "step": 90
73
  },
74
  {
75
  "epoch": 0.86,
76
- "grad_norm": 2.6017422676086426,
77
  "learning_rate": 0.00023534482758620685,
78
- "loss": 0.3546,
79
- "step": 100
80
- },
81
- {
82
- "epoch": 0.86,
83
- "eval_accuracy": 0.9545331529093369,
84
- "eval_loss": 0.16398410499095917,
85
- "eval_runtime": 66.2449,
86
- "eval_samples_per_second": 111.556,
87
- "eval_steps_per_second": 13.948,
88
- "step": 100
89
- },
90
- {
91
- "epoch": 0.95,
92
- "grad_norm": 2.4210205078125,
93
- "learning_rate": 0.00022887931034482758,
94
- "loss": 0.3267,
95
- "step": 110
96
  },
97
  {
98
  "epoch": 1.03,
99
- "grad_norm": 1.4406899213790894,
100
  "learning_rate": 0.00022241379310344826,
101
- "loss": 0.2172,
102
- "step": 120
103
- },
104
- {
105
- "epoch": 1.12,
106
- "grad_norm": 1.6676567792892456,
107
- "learning_rate": 0.00021594827586206896,
108
- "loss": 0.1671,
109
- "step": 130
110
  },
111
  {
112
  "epoch": 1.21,
113
- "grad_norm": 1.7579220533370972,
114
  "learning_rate": 0.00020948275862068963,
115
- "loss": 0.1416,
116
- "step": 140
117
- },
118
- {
119
- "epoch": 1.29,
120
- "grad_norm": 0.9925330281257629,
121
- "learning_rate": 0.00020301724137931034,
122
- "loss": 0.1289,
123
- "step": 150
124
  },
125
  {
126
  "epoch": 1.38,
127
- "grad_norm": 1.1069083213806152,
128
  "learning_rate": 0.000196551724137931,
129
- "loss": 0.1199,
130
- "step": 160
131
- },
132
- {
133
- "epoch": 1.47,
134
- "grad_norm": 2.214843273162842,
135
- "learning_rate": 0.00019008620689655169,
136
- "loss": 0.1204,
137
- "step": 170
138
  },
139
  {
140
  "epoch": 1.55,
141
- "grad_norm": 2.182187080383301,
142
  "learning_rate": 0.0001836206896551724,
143
- "loss": 0.1155,
144
- "step": 180
145
- },
146
- {
147
- "epoch": 1.64,
148
- "grad_norm": 2.003282308578491,
149
- "learning_rate": 0.0001771551724137931,
150
- "loss": 0.1319,
151
- "step": 190
152
  },
153
  {
154
  "epoch": 1.72,
155
- "grad_norm": 2.0936930179595947,
156
  "learning_rate": 0.0001706896551724138,
157
- "loss": 0.0922,
158
- "step": 200
159
  },
160
  {
161
  "epoch": 1.72,
162
- "eval_accuracy": 0.984979702300406,
163
- "eval_loss": 0.05955846235156059,
164
- "eval_runtime": 66.0945,
165
- "eval_samples_per_second": 111.81,
166
- "eval_steps_per_second": 13.98,
167
- "step": 200
168
- },
169
- {
170
- "epoch": 1.81,
171
- "grad_norm": 1.8309838771820068,
172
- "learning_rate": 0.00016422413793103446,
173
- "loss": 0.1028,
174
- "step": 210
175
  },
176
  {
177
  "epoch": 1.9,
178
- "grad_norm": 1.588274598121643,
179
  "learning_rate": 0.00015775862068965517,
180
- "loss": 0.1285,
181
- "step": 220
182
- },
183
- {
184
- "epoch": 1.98,
185
- "grad_norm": 3.1869797706604004,
186
- "learning_rate": 0.00015129310344827584,
187
- "loss": 0.1091,
188
- "step": 230
189
  },
190
  {
191
  "epoch": 2.07,
192
- "grad_norm": 0.7886011004447937,
193
  "learning_rate": 0.00014482758620689654,
194
- "loss": 0.0681,
195
- "step": 240
196
- },
197
- {
198
- "epoch": 2.16,
199
- "grad_norm": 1.0175777673721313,
200
- "learning_rate": 0.00013836206896551724,
201
- "loss": 0.062,
202
- "step": 250
203
  },
204
  {
205
  "epoch": 2.24,
206
- "grad_norm": 0.5733941197395325,
207
  "learning_rate": 0.00013189655172413792,
208
- "loss": 0.0447,
209
- "step": 260
210
- },
211
- {
212
- "epoch": 2.33,
213
- "grad_norm": 0.6709829568862915,
214
- "learning_rate": 0.0001254310344827586,
215
- "loss": 0.0352,
216
- "step": 270
217
  },
218
  {
219
  "epoch": 2.41,
220
- "grad_norm": 0.10771916061639786,
221
  "learning_rate": 0.00011896551724137931,
222
- "loss": 0.0281,
223
- "step": 280
224
- },
225
- {
226
- "epoch": 2.5,
227
- "grad_norm": 0.8952343463897705,
228
- "learning_rate": 0.0001125,
229
- "loss": 0.0401,
230
- "step": 290
231
  },
232
  {
233
  "epoch": 2.59,
234
- "grad_norm": 0.05457449331879616,
235
  "learning_rate": 0.00010603448275862067,
236
- "loss": 0.0421,
237
- "step": 300
238
- },
239
- {
240
- "epoch": 2.59,
241
- "eval_accuracy": 0.9939106901217862,
242
- "eval_loss": 0.021390045061707497,
243
- "eval_runtime": 66.2486,
244
- "eval_samples_per_second": 111.55,
245
- "eval_steps_per_second": 13.947,
246
- "step": 300
247
- },
248
- {
249
- "epoch": 2.67,
250
- "grad_norm": 0.14218959212303162,
251
- "learning_rate": 9.956896551724137e-05,
252
- "loss": 0.0364,
253
- "step": 310
254
  },
255
  {
256
  "epoch": 2.76,
257
- "grad_norm": 0.8931805491447449,
258
  "learning_rate": 9.310344827586206e-05,
259
- "loss": 0.0232,
260
- "step": 320
261
- },
262
- {
263
- "epoch": 2.84,
264
- "grad_norm": 0.25261449813842773,
265
- "learning_rate": 8.663793103448275e-05,
266
- "loss": 0.0134,
267
- "step": 330
268
  },
269
  {
270
  "epoch": 2.93,
271
- "grad_norm": 0.07521424442529678,
272
  "learning_rate": 8.017241379310344e-05,
273
- "loss": 0.0111,
274
- "step": 340
275
- },
276
- {
277
- "epoch": 3.02,
278
- "grad_norm": 0.05333692207932472,
279
- "learning_rate": 7.370689655172413e-05,
280
- "loss": 0.0104,
281
- "step": 350
282
  },
283
  {
284
  "epoch": 3.1,
285
- "grad_norm": 0.2949971556663513,
286
  "learning_rate": 6.724137931034483e-05,
287
- "loss": 0.0093,
288
- "step": 360
289
- },
290
- {
291
- "epoch": 3.19,
292
- "grad_norm": 0.06963898241519928,
293
- "learning_rate": 6.077586206896551e-05,
294
- "loss": 0.0084,
295
- "step": 370
296
  },
297
  {
298
  "epoch": 3.28,
299
- "grad_norm": 0.08035852760076523,
300
  "learning_rate": 5.4310344827586204e-05,
301
- "loss": 0.0067,
302
- "step": 380
303
- },
304
- {
305
- "epoch": 3.36,
306
- "grad_norm": 0.02499503456056118,
307
- "learning_rate": 4.78448275862069e-05,
308
- "loss": 0.0034,
309
- "step": 390
310
  },
311
  {
312
  "epoch": 3.45,
313
- "grad_norm": 0.4326638877391815,
314
  "learning_rate": 4.137931034482758e-05,
315
- "loss": 0.0087,
316
- "step": 400
317
  },
318
  {
319
  "epoch": 3.45,
320
- "eval_accuracy": 0.9994587280108255,
321
- "eval_loss": 0.0037413176614791155,
322
- "eval_runtime": 66.2753,
323
- "eval_samples_per_second": 111.505,
324
- "eval_steps_per_second": 13.942,
325
- "step": 400
326
- },
327
- {
328
- "epoch": 3.53,
329
- "grad_norm": 0.016918424516916275,
330
- "learning_rate": 3.4913793103448275e-05,
331
- "loss": 0.0035,
332
- "step": 410
333
  },
334
  {
335
  "epoch": 3.62,
336
- "grad_norm": 0.022589026018977165,
337
  "learning_rate": 2.8448275862068963e-05,
338
- "loss": 0.004,
339
- "step": 420
340
- },
341
- {
342
- "epoch": 3.71,
343
- "grad_norm": 0.06562381237745285,
344
- "learning_rate": 2.198275862068965e-05,
345
- "loss": 0.0027,
346
- "step": 430
347
  },
348
  {
349
  "epoch": 3.79,
350
- "grad_norm": 0.0264147762209177,
351
  "learning_rate": 1.5517241379310342e-05,
352
- "loss": 0.0027,
353
- "step": 440
354
- },
355
- {
356
- "epoch": 3.88,
357
- "grad_norm": 0.02782212570309639,
358
- "learning_rate": 9.051724137931034e-06,
359
- "loss": 0.0046,
360
- "step": 450
361
  },
362
  {
363
  "epoch": 3.97,
364
- "grad_norm": 0.02095525525510311,
365
  "learning_rate": 2.5862068965517237e-06,
366
- "loss": 0.0036,
367
- "step": 460
368
  },
369
  {
370
  "epoch": 4.0,
371
- "step": 464,
372
  "total_flos": 2.2913817801515827e+18,
373
- "train_loss": 0.18540822506223903,
374
- "train_runtime": 740.8045,
375
- "train_samples_per_second": 39.903,
376
- "train_steps_per_second": 0.626
377
  }
378
  ],
379
  "logging_steps": 10,
380
- "max_steps": 464,
381
  "num_input_tokens_seen": 0,
382
  "num_train_epochs": 4,
383
  "save_steps": 100,
384
  "total_flos": 2.2913817801515827e+18,
385
- "train_batch_size": 64,
386
  "trial_name": null,
387
  "trial_params": null
388
  }
 
1
  {
2
+ "best_metric": 0.004596503917127848,
3
+ "best_model_checkpoint": "./vit-base-pets/checkpoint-200",
4
  "epoch": 4.0,
5
  "eval_steps": 100,
6
+ "global_step": 232,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
 
 
 
 
 
 
 
11
  {
12
  "epoch": 0.17,
13
+ "grad_norm": 168186.984375,
14
  "learning_rate": 0.0002870689655172413,
15
+ "loss": 2.1401,
16
+ "step": 10
 
 
 
 
 
 
 
17
  },
18
  {
19
  "epoch": 0.34,
20
+ "grad_norm": 134643.921875,
21
  "learning_rate": 0.0002741379310344827,
22
+ "loss": 0.5937,
23
+ "step": 20
 
 
 
 
 
 
 
24
  },
25
  {
26
  "epoch": 0.52,
27
+ "grad_norm": 117247.1015625,
28
  "learning_rate": 0.00026120689655172413,
29
+ "loss": 0.3283,
30
+ "step": 30
 
 
 
 
 
 
 
31
  },
32
  {
33
  "epoch": 0.69,
34
+ "grad_norm": 148088.53125,
35
  "learning_rate": 0.0002482758620689655,
36
+ "loss": 0.3447,
37
+ "step": 40
 
 
 
 
 
 
 
38
  },
39
  {
40
  "epoch": 0.86,
41
+ "grad_norm": 118186.203125,
42
  "learning_rate": 0.00023534482758620685,
43
+ "loss": 0.2972,
44
+ "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  },
46
  {
47
  "epoch": 1.03,
48
+ "grad_norm": 89050.390625,
49
  "learning_rate": 0.00022241379310344826,
50
+ "loss": 0.2425,
51
+ "step": 60
 
 
 
 
 
 
 
52
  },
53
  {
54
  "epoch": 1.21,
55
+ "grad_norm": 50770.4296875,
56
  "learning_rate": 0.00020948275862068963,
57
+ "loss": 0.1025,
58
+ "step": 70
 
 
 
 
 
 
 
59
  },
60
  {
61
  "epoch": 1.38,
62
+ "grad_norm": 102693.65625,
63
  "learning_rate": 0.000196551724137931,
64
+ "loss": 0.0998,
65
+ "step": 80
 
 
 
 
 
 
 
66
  },
67
  {
68
  "epoch": 1.55,
69
+ "grad_norm": 58018.40234375,
70
  "learning_rate": 0.0001836206896551724,
71
+ "loss": 0.1048,
72
+ "step": 90
 
 
 
 
 
 
 
73
  },
74
  {
75
  "epoch": 1.72,
76
+ "grad_norm": 51126.80078125,
77
  "learning_rate": 0.0001706896551724138,
78
+ "loss": 0.0887,
79
+ "step": 100
80
  },
81
  {
82
  "epoch": 1.72,
83
+ "eval_accuracy": 0.9774018944519621,
84
+ "eval_loss": 0.07652416825294495,
85
+ "eval_runtime": 90.7155,
86
+ "eval_samples_per_second": 81.463,
87
+ "eval_steps_per_second": 5.093,
88
+ "step": 100
 
 
 
 
 
 
 
89
  },
90
  {
91
  "epoch": 1.9,
92
+ "grad_norm": 86962.4921875,
93
  "learning_rate": 0.00015775862068965517,
94
+ "loss": 0.0926,
95
+ "step": 110
 
 
 
 
 
 
 
96
  },
97
  {
98
  "epoch": 2.07,
99
+ "grad_norm": 72358.4609375,
100
  "learning_rate": 0.00014482758620689654,
101
+ "loss": 0.0706,
102
+ "step": 120
 
 
 
 
 
 
 
103
  },
104
  {
105
  "epoch": 2.24,
106
+ "grad_norm": 20616.7109375,
107
  "learning_rate": 0.00013189655172413792,
108
+ "loss": 0.039,
109
+ "step": 130
 
 
 
 
 
 
 
110
  },
111
  {
112
  "epoch": 2.41,
113
+ "grad_norm": 70676.0625,
114
  "learning_rate": 0.00011896551724137931,
115
+ "loss": 0.0351,
116
+ "step": 140
 
 
 
 
 
 
 
117
  },
118
  {
119
  "epoch": 2.59,
120
+ "grad_norm": 12601.1337890625,
121
  "learning_rate": 0.00010603448275862067,
122
+ "loss": 0.0263,
123
+ "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  },
125
  {
126
  "epoch": 2.76,
127
+ "grad_norm": 77678.21875,
128
  "learning_rate": 9.310344827586206e-05,
129
+ "loss": 0.0296,
130
+ "step": 160
 
 
 
 
 
 
 
131
  },
132
  {
133
  "epoch": 2.93,
134
+ "grad_norm": 36381.59765625,
135
  "learning_rate": 8.017241379310344e-05,
136
+ "loss": 0.025,
137
+ "step": 170
 
 
 
 
 
 
 
138
  },
139
  {
140
  "epoch": 3.1,
141
+ "grad_norm": 12375.634765625,
142
  "learning_rate": 6.724137931034483e-05,
143
+ "loss": 0.0087,
144
+ "step": 180
 
 
 
 
 
 
 
145
  },
146
  {
147
  "epoch": 3.28,
148
+ "grad_norm": 1895.7933349609375,
149
  "learning_rate": 5.4310344827586204e-05,
150
+ "loss": 0.0043,
151
+ "step": 190
 
 
 
 
 
 
 
152
  },
153
  {
154
  "epoch": 3.45,
155
+ "grad_norm": 49621.76171875,
156
  "learning_rate": 4.137931034482758e-05,
157
+ "loss": 0.0045,
158
+ "step": 200
159
  },
160
  {
161
  "epoch": 3.45,
162
+ "eval_accuracy": 0.9989174560216508,
163
+ "eval_loss": 0.004596503917127848,
164
+ "eval_runtime": 91.6398,
165
+ "eval_samples_per_second": 80.642,
166
+ "eval_steps_per_second": 5.041,
167
+ "step": 200
 
 
 
 
 
 
 
168
  },
169
  {
170
  "epoch": 3.62,
171
+ "grad_norm": 798.2269287109375,
172
  "learning_rate": 2.8448275862068963e-05,
173
+ "loss": 0.0045,
174
+ "step": 210
 
 
 
 
 
 
 
175
  },
176
  {
177
  "epoch": 3.79,
178
+ "grad_norm": 2413.81640625,
179
  "learning_rate": 1.5517241379310342e-05,
180
+ "loss": 0.0034,
181
+ "step": 220
 
 
 
 
 
 
 
182
  },
183
  {
184
  "epoch": 3.97,
185
+ "grad_norm": 908.8674926757812,
186
  "learning_rate": 2.5862068965517237e-06,
187
+ "loss": 0.0021,
188
+ "step": 230
189
  },
190
  {
191
  "epoch": 4.0,
192
+ "step": 232,
193
  "total_flos": 2.2913817801515827e+18,
194
+ "train_loss": 0.202088082896682,
195
+ "train_runtime": 854.7685,
196
+ "train_samples_per_second": 34.582,
197
+ "train_steps_per_second": 0.271
198
  }
199
  ],
200
  "logging_steps": 10,
201
+ "max_steps": 232,
202
  "num_input_tokens_seen": 0,
203
  "num_train_epochs": 4,
204
  "save_steps": 100,
205
  "total_flos": 2.2913817801515827e+18,
206
+ "train_batch_size": 128,
207
  "trial_name": null,
208
  "trial_params": null
209
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72d067b80968c329a6d7da9dee207c0df97fc6614df48511b3b0bb0ba0e75328
3
  size 4920
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0bf29b9cfaa423385553cd4c1a9ee40cd1712cd86c4bf40b3c7f39094ea7217
3
  size 4920