dima806 commited on
Commit
e738009
·
verified ·
1 Parent(s): 2086917

Upload folder using huggingface_hub

Browse files
checkpoint-28700/config.json ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/vit-base-patch16-224-in21k",
3
+ "architectures": [
4
+ "ViTForImageClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.0,
7
+ "encoder_stride": 16,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.0,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "Afghan",
13
+ "1": "African Wild Dog",
14
+ "2": "Airedale",
15
+ "3": "American Hairless",
16
+ "4": "American Spaniel",
17
+ "5": "Basenji",
18
+ "6": "Basset",
19
+ "7": "Beagle",
20
+ "8": "Bearded Collie",
21
+ "9": "Bermaise",
22
+ "10": "Bichon Frise",
23
+ "11": "Blenheim",
24
+ "12": "Bloodhound",
25
+ "13": "Bluetick",
26
+ "14": "Border Collie",
27
+ "15": "Borzoi",
28
+ "16": "Boston Terrier",
29
+ "17": "Boxer",
30
+ "18": "Bull Mastiff",
31
+ "19": "Bull Terrier",
32
+ "20": "Bulldog",
33
+ "21": "Cairn",
34
+ "22": "Chihuahua",
35
+ "23": "Chinese Crested",
36
+ "24": "Chow",
37
+ "25": "Clumber",
38
+ "26": "Cockapoo",
39
+ "27": "Cocker",
40
+ "28": "Collie",
41
+ "29": "Corgi",
42
+ "30": "Coyote",
43
+ "31": "Dalmation",
44
+ "32": "Dhole",
45
+ "33": "Dingo",
46
+ "34": "Doberman",
47
+ "35": "Elk Hound",
48
+ "36": "French Bulldog",
49
+ "37": "German Sheperd",
50
+ "38": "Golden Retriever",
51
+ "39": "Great Dane",
52
+ "40": "Great Perenees",
53
+ "41": "Greyhound",
54
+ "42": "Groenendael",
55
+ "43": "Irish Spaniel",
56
+ "44": "Irish Wolfhound",
57
+ "45": "Japanese Spaniel",
58
+ "46": "Komondor",
59
+ "47": "Labradoodle",
60
+ "48": "Labrador",
61
+ "49": "Lhasa",
62
+ "50": "Malinois",
63
+ "51": "Maltese",
64
+ "52": "Mex Hairless",
65
+ "53": "Newfoundland",
66
+ "54": "Pekinese",
67
+ "55": "Pit Bull",
68
+ "56": "Pomeranian",
69
+ "57": "Poodle",
70
+ "58": "Pug",
71
+ "59": "Rhodesian",
72
+ "60": "Rottweiler",
73
+ "61": "Saint Bernard",
74
+ "62": "Schnauzer",
75
+ "63": "Scotch Terrier",
76
+ "64": "Shar_Pei",
77
+ "65": "Shiba Inu",
78
+ "66": "Shih-Tzu",
79
+ "67": "Siberian Husky",
80
+ "68": "Vizsla",
81
+ "69": "Yorkie"
82
+ },
83
+ "image_size": 224,
84
+ "initializer_range": 0.02,
85
+ "intermediate_size": 3072,
86
+ "label2id": {
87
+ "Afghan": 0,
88
+ "African Wild Dog": 1,
89
+ "Airedale": 2,
90
+ "American Hairless": 3,
91
+ "American Spaniel": 4,
92
+ "Basenji": 5,
93
+ "Basset": 6,
94
+ "Beagle": 7,
95
+ "Bearded Collie": 8,
96
+ "Bermaise": 9,
97
+ "Bichon Frise": 10,
98
+ "Blenheim": 11,
99
+ "Bloodhound": 12,
100
+ "Bluetick": 13,
101
+ "Border Collie": 14,
102
+ "Borzoi": 15,
103
+ "Boston Terrier": 16,
104
+ "Boxer": 17,
105
+ "Bull Mastiff": 18,
106
+ "Bull Terrier": 19,
107
+ "Bulldog": 20,
108
+ "Cairn": 21,
109
+ "Chihuahua": 22,
110
+ "Chinese Crested": 23,
111
+ "Chow": 24,
112
+ "Clumber": 25,
113
+ "Cockapoo": 26,
114
+ "Cocker": 27,
115
+ "Collie": 28,
116
+ "Corgi": 29,
117
+ "Coyote": 30,
118
+ "Dalmation": 31,
119
+ "Dhole": 32,
120
+ "Dingo": 33,
121
+ "Doberman": 34,
122
+ "Elk Hound": 35,
123
+ "French Bulldog": 36,
124
+ "German Sheperd": 37,
125
+ "Golden Retriever": 38,
126
+ "Great Dane": 39,
127
+ "Great Perenees": 40,
128
+ "Greyhound": 41,
129
+ "Groenendael": 42,
130
+ "Irish Spaniel": 43,
131
+ "Irish Wolfhound": 44,
132
+ "Japanese Spaniel": 45,
133
+ "Komondor": 46,
134
+ "Labradoodle": 47,
135
+ "Labrador": 48,
136
+ "Lhasa": 49,
137
+ "Malinois": 50,
138
+ "Maltese": 51,
139
+ "Mex Hairless": 52,
140
+ "Newfoundland": 53,
141
+ "Pekinese": 54,
142
+ "Pit Bull": 55,
143
+ "Pomeranian": 56,
144
+ "Poodle": 57,
145
+ "Pug": 58,
146
+ "Rhodesian": 59,
147
+ "Rottweiler": 60,
148
+ "Saint Bernard": 61,
149
+ "Schnauzer": 62,
150
+ "Scotch Terrier": 63,
151
+ "Shar_Pei": 64,
152
+ "Shiba Inu": 65,
153
+ "Shih-Tzu": 66,
154
+ "Siberian Husky": 67,
155
+ "Vizsla": 68,
156
+ "Yorkie": 69
157
+ },
158
+ "layer_norm_eps": 1e-12,
159
+ "model_type": "vit",
160
+ "num_attention_heads": 12,
161
+ "num_channels": 3,
162
+ "num_hidden_layers": 12,
163
+ "patch_size": 16,
164
+ "problem_type": "single_label_classification",
165
+ "qkv_bias": true,
166
+ "torch_dtype": "float32",
167
+ "transformers_version": "4.45.2"
168
+ }
checkpoint-28700/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa6f5b1f9c44c3c29d9c4c7c230929790148180797e2a7c74658d6a0569b7889
3
+ size 343433152
checkpoint-28700/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9441119e17fbce8c02c123f0a6bc137190b493c4a02ea94d0eb91c98ce0d197f
3
+ size 686986757
checkpoint-28700/preprocessor_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "do_rescale": true,
4
+ "do_resize": true,
5
+ "image_mean": [
6
+ 0.5,
7
+ 0.5,
8
+ 0.5
9
+ ],
10
+ "image_processor_type": "ViTImageProcessor",
11
+ "image_std": [
12
+ 0.5,
13
+ 0.5,
14
+ 0.5
15
+ ],
16
+ "resample": 2,
17
+ "rescale_factor": 0.00392156862745098,
18
+ "size": {
19
+ "height": 224,
20
+ "width": 224
21
+ }
22
+ }
checkpoint-28700/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4e46c392585b57f3d919f401bc84426e9917e75fa9450d0b2300bd39adcd700
3
+ size 14575
checkpoint-28700/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5f5db57e2e71196f015b9687e3a2120516fab7ddf9b02d267209859f3b74fef
3
+ size 627
checkpoint-28700/trainer_state.json ADDED
@@ -0,0 +1,1432 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 2.204108953475952,
3
+ "best_model_checkpoint": "dogs_70_breeds_image_detection/checkpoint-28700",
4
+ "epoch": 100.0,
5
+ "eval_steps": 500,
6
+ "global_step": 28700,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_accuracy": 0.02490170380078637,
14
+ "eval_loss": 4.22898006439209,
15
+ "eval_model_preparation_time": 0.0056,
16
+ "eval_runtime": 53.0352,
17
+ "eval_samples_per_second": 115.093,
18
+ "eval_steps_per_second": 14.387,
19
+ "step": 287
20
+ },
21
+ {
22
+ "epoch": 1.7421602787456445,
23
+ "grad_norm": 1.2073975801467896,
24
+ "learning_rate": 9.842931937172775e-07,
25
+ "loss": 4.2284,
26
+ "step": 500
27
+ },
28
+ {
29
+ "epoch": 2.0,
30
+ "eval_accuracy": 0.048820445609436436,
31
+ "eval_loss": 4.192336559295654,
32
+ "eval_model_preparation_time": 0.0056,
33
+ "eval_runtime": 51.9294,
34
+ "eval_samples_per_second": 117.544,
35
+ "eval_steps_per_second": 14.693,
36
+ "step": 574
37
+ },
38
+ {
39
+ "epoch": 3.0,
40
+ "eval_accuracy": 0.09518348623853211,
41
+ "eval_loss": 4.150681972503662,
42
+ "eval_model_preparation_time": 0.0056,
43
+ "eval_runtime": 51.3038,
44
+ "eval_samples_per_second": 118.977,
45
+ "eval_steps_per_second": 14.872,
46
+ "step": 861
47
+ },
48
+ {
49
+ "epoch": 3.484320557491289,
50
+ "grad_norm": 1.3359559774398804,
51
+ "learning_rate": 9.668411867364747e-07,
52
+ "loss": 4.1524,
53
+ "step": 1000
54
+ },
55
+ {
56
+ "epoch": 4.0,
57
+ "eval_accuracy": 0.18299475753604194,
58
+ "eval_loss": 4.1059417724609375,
59
+ "eval_model_preparation_time": 0.0056,
60
+ "eval_runtime": 51.1368,
61
+ "eval_samples_per_second": 119.366,
62
+ "eval_steps_per_second": 14.921,
63
+ "step": 1148
64
+ },
65
+ {
66
+ "epoch": 5.0,
67
+ "eval_accuracy": 0.29701834862385323,
68
+ "eval_loss": 4.060353755950928,
69
+ "eval_model_preparation_time": 0.0056,
70
+ "eval_runtime": 52.6533,
71
+ "eval_samples_per_second": 115.928,
72
+ "eval_steps_per_second": 14.491,
73
+ "step": 1435
74
+ },
75
+ {
76
+ "epoch": 5.2264808362369335,
77
+ "grad_norm": 1.33736252784729,
78
+ "learning_rate": 9.493891797556719e-07,
79
+ "loss": 4.07,
80
+ "step": 1500
81
+ },
82
+ {
83
+ "epoch": 6.0,
84
+ "eval_accuracy": 0.42480340760157276,
85
+ "eval_loss": 4.015019416809082,
86
+ "eval_model_preparation_time": 0.0056,
87
+ "eval_runtime": 52.6896,
88
+ "eval_samples_per_second": 115.848,
89
+ "eval_steps_per_second": 14.481,
90
+ "step": 1722
91
+ },
92
+ {
93
+ "epoch": 6.968641114982578,
94
+ "grad_norm": 1.47151780128479,
95
+ "learning_rate": 9.31937172774869e-07,
96
+ "loss": 3.9838,
97
+ "step": 2000
98
+ },
99
+ {
100
+ "epoch": 7.0,
101
+ "eval_accuracy": 0.5329292267365662,
102
+ "eval_loss": 3.9675402641296387,
103
+ "eval_model_preparation_time": 0.0056,
104
+ "eval_runtime": 53.5702,
105
+ "eval_samples_per_second": 113.944,
106
+ "eval_steps_per_second": 14.243,
107
+ "step": 2009
108
+ },
109
+ {
110
+ "epoch": 8.0,
111
+ "eval_accuracy": 0.6192660550458715,
112
+ "eval_loss": 3.920508623123169,
113
+ "eval_model_preparation_time": 0.0056,
114
+ "eval_runtime": 54.4646,
115
+ "eval_samples_per_second": 112.073,
116
+ "eval_steps_per_second": 14.009,
117
+ "step": 2296
118
+ },
119
+ {
120
+ "epoch": 8.710801393728223,
121
+ "grad_norm": 1.5192607641220093,
122
+ "learning_rate": 9.144851657940664e-07,
123
+ "loss": 3.8977,
124
+ "step": 2500
125
+ },
126
+ {
127
+ "epoch": 9.0,
128
+ "eval_accuracy": 0.6908584534731324,
129
+ "eval_loss": 3.8727903366088867,
130
+ "eval_model_preparation_time": 0.0056,
131
+ "eval_runtime": 53.3384,
132
+ "eval_samples_per_second": 114.439,
133
+ "eval_steps_per_second": 14.305,
134
+ "step": 2583
135
+ },
136
+ {
137
+ "epoch": 10.0,
138
+ "eval_accuracy": 0.7378768020969856,
139
+ "eval_loss": 3.8257367610931396,
140
+ "eval_model_preparation_time": 0.0056,
141
+ "eval_runtime": 52.9624,
142
+ "eval_samples_per_second": 115.252,
143
+ "eval_steps_per_second": 14.406,
144
+ "step": 2870
145
+ },
146
+ {
147
+ "epoch": 10.452961672473867,
148
+ "grad_norm": 1.5286623239517212,
149
+ "learning_rate": 8.970331588132635e-07,
150
+ "loss": 3.8106,
151
+ "step": 3000
152
+ },
153
+ {
154
+ "epoch": 11.0,
155
+ "eval_accuracy": 0.7714613368283093,
156
+ "eval_loss": 3.7787516117095947,
157
+ "eval_model_preparation_time": 0.0056,
158
+ "eval_runtime": 52.9786,
159
+ "eval_samples_per_second": 115.216,
160
+ "eval_steps_per_second": 14.402,
161
+ "step": 3157
162
+ },
163
+ {
164
+ "epoch": 12.0,
165
+ "eval_accuracy": 0.7980013106159896,
166
+ "eval_loss": 3.7332425117492676,
167
+ "eval_model_preparation_time": 0.0056,
168
+ "eval_runtime": 53.2337,
169
+ "eval_samples_per_second": 114.664,
170
+ "eval_steps_per_second": 14.333,
171
+ "step": 3444
172
+ },
173
+ {
174
+ "epoch": 12.195121951219512,
175
+ "grad_norm": 2.0538368225097656,
176
+ "learning_rate": 8.795811518324607e-07,
177
+ "loss": 3.7266,
178
+ "step": 3500
179
+ },
180
+ {
181
+ "epoch": 13.0,
182
+ "eval_accuracy": 0.8112712975098296,
183
+ "eval_loss": 3.68833065032959,
184
+ "eval_model_preparation_time": 0.0056,
185
+ "eval_runtime": 53.3885,
186
+ "eval_samples_per_second": 114.332,
187
+ "eval_steps_per_second": 14.291,
188
+ "step": 3731
189
+ },
190
+ {
191
+ "epoch": 13.937282229965156,
192
+ "grad_norm": 1.6309235095977783,
193
+ "learning_rate": 8.621291448516579e-07,
194
+ "loss": 3.6456,
195
+ "step": 4000
196
+ },
197
+ {
198
+ "epoch": 14.0,
199
+ "eval_accuracy": 0.8211009174311926,
200
+ "eval_loss": 3.642503023147583,
201
+ "eval_model_preparation_time": 0.0056,
202
+ "eval_runtime": 53.7562,
203
+ "eval_samples_per_second": 113.55,
204
+ "eval_steps_per_second": 14.194,
205
+ "step": 4018
206
+ },
207
+ {
208
+ "epoch": 15.0,
209
+ "eval_accuracy": 0.8299475753604194,
210
+ "eval_loss": 3.5993118286132812,
211
+ "eval_model_preparation_time": 0.0056,
212
+ "eval_runtime": 54.1732,
213
+ "eval_samples_per_second": 112.676,
214
+ "eval_steps_per_second": 14.084,
215
+ "step": 4305
216
+ },
217
+ {
218
+ "epoch": 15.679442508710801,
219
+ "grad_norm": 1.6514416933059692,
220
+ "learning_rate": 8.446771378708551e-07,
221
+ "loss": 3.5667,
222
+ "step": 4500
223
+ },
224
+ {
225
+ "epoch": 16.0,
226
+ "eval_accuracy": 0.8381389252948886,
227
+ "eval_loss": 3.5562214851379395,
228
+ "eval_model_preparation_time": 0.0056,
229
+ "eval_runtime": 53.3732,
230
+ "eval_samples_per_second": 114.365,
231
+ "eval_steps_per_second": 14.296,
232
+ "step": 4592
233
+ },
234
+ {
235
+ "epoch": 17.0,
236
+ "eval_accuracy": 0.8423984272608126,
237
+ "eval_loss": 3.514224052429199,
238
+ "eval_model_preparation_time": 0.0056,
239
+ "eval_runtime": 53.3166,
240
+ "eval_samples_per_second": 114.486,
241
+ "eval_steps_per_second": 14.311,
242
+ "step": 4879
243
+ },
244
+ {
245
+ "epoch": 17.421602787456447,
246
+ "grad_norm": 1.6095918416976929,
247
+ "learning_rate": 8.272251308900523e-07,
248
+ "loss": 3.4909,
249
+ "step": 5000
250
+ },
251
+ {
252
+ "epoch": 18.0,
253
+ "eval_accuracy": 0.851572739187418,
254
+ "eval_loss": 3.4752180576324463,
255
+ "eval_model_preparation_time": 0.0056,
256
+ "eval_runtime": 52.6463,
257
+ "eval_samples_per_second": 115.944,
258
+ "eval_steps_per_second": 14.493,
259
+ "step": 5166
260
+ },
261
+ {
262
+ "epoch": 19.0,
263
+ "eval_accuracy": 0.85370249017038,
264
+ "eval_loss": 3.435464859008789,
265
+ "eval_model_preparation_time": 0.0056,
266
+ "eval_runtime": 52.9099,
267
+ "eval_samples_per_second": 115.366,
268
+ "eval_steps_per_second": 14.421,
269
+ "step": 5453
270
+ },
271
+ {
272
+ "epoch": 19.16376306620209,
273
+ "grad_norm": 1.7376962900161743,
274
+ "learning_rate": 8.097731239092496e-07,
275
+ "loss": 3.419,
276
+ "step": 5500
277
+ },
278
+ {
279
+ "epoch": 20.0,
280
+ "eval_accuracy": 0.8582896461336829,
281
+ "eval_loss": 3.3971521854400635,
282
+ "eval_model_preparation_time": 0.0056,
283
+ "eval_runtime": 52.7361,
284
+ "eval_samples_per_second": 115.746,
285
+ "eval_steps_per_second": 14.468,
286
+ "step": 5740
287
+ },
288
+ {
289
+ "epoch": 20.905923344947734,
290
+ "grad_norm": 1.8516736030578613,
291
+ "learning_rate": 7.923211169284468e-07,
292
+ "loss": 3.349,
293
+ "step": 6000
294
+ },
295
+ {
296
+ "epoch": 21.0,
297
+ "eval_accuracy": 0.862221494102228,
298
+ "eval_loss": 3.35935640335083,
299
+ "eval_model_preparation_time": 0.0056,
300
+ "eval_runtime": 53.0085,
301
+ "eval_samples_per_second": 115.151,
302
+ "eval_steps_per_second": 14.394,
303
+ "step": 6027
304
+ },
305
+ {
306
+ "epoch": 22.0,
307
+ "eval_accuracy": 0.8640235910878112,
308
+ "eval_loss": 3.323974370956421,
309
+ "eval_model_preparation_time": 0.0056,
310
+ "eval_runtime": 53.6052,
311
+ "eval_samples_per_second": 113.87,
312
+ "eval_steps_per_second": 14.234,
313
+ "step": 6314
314
+ },
315
+ {
316
+ "epoch": 22.64808362369338,
317
+ "grad_norm": 1.8237162828445435,
318
+ "learning_rate": 7.74869109947644e-07,
319
+ "loss": 3.2859,
320
+ "step": 6500
321
+ },
322
+ {
323
+ "epoch": 23.0,
324
+ "eval_accuracy": 0.8663171690694627,
325
+ "eval_loss": 3.288198232650757,
326
+ "eval_model_preparation_time": 0.0056,
327
+ "eval_runtime": 52.8638,
328
+ "eval_samples_per_second": 115.467,
329
+ "eval_steps_per_second": 14.433,
330
+ "step": 6601
331
+ },
332
+ {
333
+ "epoch": 24.0,
334
+ "eval_accuracy": 0.8699213630406291,
335
+ "eval_loss": 3.2544491291046143,
336
+ "eval_model_preparation_time": 0.0056,
337
+ "eval_runtime": 52.3248,
338
+ "eval_samples_per_second": 116.656,
339
+ "eval_steps_per_second": 14.582,
340
+ "step": 6888
341
+ },
342
+ {
343
+ "epoch": 24.390243902439025,
344
+ "grad_norm": 1.9635404348373413,
345
+ "learning_rate": 7.574171029668411e-07,
346
+ "loss": 3.2226,
347
+ "step": 7000
348
+ },
349
+ {
350
+ "epoch": 25.0,
351
+ "eval_accuracy": 0.8697575360419397,
352
+ "eval_loss": 3.2205216884613037,
353
+ "eval_model_preparation_time": 0.0056,
354
+ "eval_runtime": 52.8748,
355
+ "eval_samples_per_second": 115.443,
356
+ "eval_steps_per_second": 14.43,
357
+ "step": 7175
358
+ },
359
+ {
360
+ "epoch": 26.0,
361
+ "eval_accuracy": 0.8733617300131061,
362
+ "eval_loss": 3.1889891624450684,
363
+ "eval_model_preparation_time": 0.0056,
364
+ "eval_runtime": 54.213,
365
+ "eval_samples_per_second": 112.593,
366
+ "eval_steps_per_second": 14.074,
367
+ "step": 7462
368
+ },
369
+ {
370
+ "epoch": 26.13240418118467,
371
+ "grad_norm": 1.7023582458496094,
372
+ "learning_rate": 7.399650959860384e-07,
373
+ "loss": 3.1622,
374
+ "step": 7500
375
+ },
376
+ {
377
+ "epoch": 27.0,
378
+ "eval_accuracy": 0.8756553079947575,
379
+ "eval_loss": 3.157259941101074,
380
+ "eval_model_preparation_time": 0.0056,
381
+ "eval_runtime": 52.9974,
382
+ "eval_samples_per_second": 115.176,
383
+ "eval_steps_per_second": 14.397,
384
+ "step": 7749
385
+ },
386
+ {
387
+ "epoch": 27.874564459930312,
388
+ "grad_norm": 1.7533528804779053,
389
+ "learning_rate": 7.225130890052355e-07,
390
+ "loss": 3.1082,
391
+ "step": 8000
392
+ },
393
+ {
394
+ "epoch": 28.0,
395
+ "eval_accuracy": 0.8787680209698558,
396
+ "eval_loss": 3.126617908477783,
397
+ "eval_model_preparation_time": 0.0056,
398
+ "eval_runtime": 52.8562,
399
+ "eval_samples_per_second": 115.483,
400
+ "eval_steps_per_second": 14.435,
401
+ "step": 8036
402
+ },
403
+ {
404
+ "epoch": 29.0,
405
+ "eval_accuracy": 0.8807339449541285,
406
+ "eval_loss": 3.0970399379730225,
407
+ "eval_model_preparation_time": 0.0056,
408
+ "eval_runtime": 52.9688,
409
+ "eval_samples_per_second": 115.238,
410
+ "eval_steps_per_second": 14.405,
411
+ "step": 8323
412
+ },
413
+ {
414
+ "epoch": 29.61672473867596,
415
+ "grad_norm": 1.8451896905899048,
416
+ "learning_rate": 7.050610820244328e-07,
417
+ "loss": 3.0527,
418
+ "step": 8500
419
+ },
420
+ {
421
+ "epoch": 30.0,
422
+ "eval_accuracy": 0.882699868938401,
423
+ "eval_loss": 3.068537950515747,
424
+ "eval_model_preparation_time": 0.0056,
425
+ "eval_runtime": 52.723,
426
+ "eval_samples_per_second": 115.775,
427
+ "eval_steps_per_second": 14.472,
428
+ "step": 8610
429
+ },
430
+ {
431
+ "epoch": 31.0,
432
+ "eval_accuracy": 0.8822083879423329,
433
+ "eval_loss": 3.0398900508880615,
434
+ "eval_model_preparation_time": 0.0056,
435
+ "eval_runtime": 52.8402,
436
+ "eval_samples_per_second": 115.518,
437
+ "eval_steps_per_second": 14.44,
438
+ "step": 8897
439
+ },
440
+ {
441
+ "epoch": 31.358885017421603,
442
+ "grad_norm": 1.9000401496887207,
443
+ "learning_rate": 6.8760907504363e-07,
444
+ "loss": 3.0013,
445
+ "step": 9000
446
+ },
447
+ {
448
+ "epoch": 32.0,
449
+ "eval_accuracy": 0.8843381389252949,
450
+ "eval_loss": 3.0114569664001465,
451
+ "eval_model_preparation_time": 0.0056,
452
+ "eval_runtime": 52.8176,
453
+ "eval_samples_per_second": 115.568,
454
+ "eval_steps_per_second": 14.446,
455
+ "step": 9184
456
+ },
457
+ {
458
+ "epoch": 33.0,
459
+ "eval_accuracy": 0.8863040629095675,
460
+ "eval_loss": 2.9846866130828857,
461
+ "eval_model_preparation_time": 0.0056,
462
+ "eval_runtime": 52.9145,
463
+ "eval_samples_per_second": 115.356,
464
+ "eval_steps_per_second": 14.419,
465
+ "step": 9471
466
+ },
467
+ {
468
+ "epoch": 33.10104529616725,
469
+ "grad_norm": 1.8424733877182007,
470
+ "learning_rate": 6.701570680628273e-07,
471
+ "loss": 2.9544,
472
+ "step": 9500
473
+ },
474
+ {
475
+ "epoch": 34.0,
476
+ "eval_accuracy": 0.8881061598951507,
477
+ "eval_loss": 2.9583957195281982,
478
+ "eval_model_preparation_time": 0.0056,
479
+ "eval_runtime": 53.5577,
480
+ "eval_samples_per_second": 113.97,
481
+ "eval_steps_per_second": 14.246,
482
+ "step": 9758
483
+ },
484
+ {
485
+ "epoch": 34.84320557491289,
486
+ "grad_norm": 1.8269978761672974,
487
+ "learning_rate": 6.527050610820244e-07,
488
+ "loss": 2.9042,
489
+ "step": 10000
490
+ },
491
+ {
492
+ "epoch": 35.0,
493
+ "eval_accuracy": 0.8890891218872871,
494
+ "eval_loss": 2.93341326713562,
495
+ "eval_model_preparation_time": 0.0056,
496
+ "eval_runtime": 53.4697,
497
+ "eval_samples_per_second": 114.158,
498
+ "eval_steps_per_second": 14.27,
499
+ "step": 10045
500
+ },
501
+ {
502
+ "epoch": 36.0,
503
+ "eval_accuracy": 0.890399737876802,
504
+ "eval_loss": 2.908048152923584,
505
+ "eval_model_preparation_time": 0.0056,
506
+ "eval_runtime": 53.4437,
507
+ "eval_samples_per_second": 114.214,
508
+ "eval_steps_per_second": 14.277,
509
+ "step": 10332
510
+ },
511
+ {
512
+ "epoch": 36.58536585365854,
513
+ "grad_norm": 2.0479116439819336,
514
+ "learning_rate": 6.352530541012216e-07,
515
+ "loss": 2.8575,
516
+ "step": 10500
517
+ },
518
+ {
519
+ "epoch": 37.0,
520
+ "eval_accuracy": 0.8907273918741808,
521
+ "eval_loss": 2.8831241130828857,
522
+ "eval_model_preparation_time": 0.0056,
523
+ "eval_runtime": 52.6355,
524
+ "eval_samples_per_second": 115.967,
525
+ "eval_steps_per_second": 14.496,
526
+ "step": 10619
527
+ },
528
+ {
529
+ "epoch": 38.0,
530
+ "eval_accuracy": 0.8918741808650066,
531
+ "eval_loss": 2.860414981842041,
532
+ "eval_model_preparation_time": 0.0056,
533
+ "eval_runtime": 54.5384,
534
+ "eval_samples_per_second": 111.921,
535
+ "eval_steps_per_second": 13.99,
536
+ "step": 10906
537
+ },
538
+ {
539
+ "epoch": 38.32752613240418,
540
+ "grad_norm": 2.2141685485839844,
541
+ "learning_rate": 6.178010471204188e-07,
542
+ "loss": 2.8171,
543
+ "step": 11000
544
+ },
545
+ {
546
+ "epoch": 39.0,
547
+ "eval_accuracy": 0.8926933158584535,
548
+ "eval_loss": 2.8362042903900146,
549
+ "eval_model_preparation_time": 0.0056,
550
+ "eval_runtime": 52.4046,
551
+ "eval_samples_per_second": 116.478,
552
+ "eval_steps_per_second": 14.56,
553
+ "step": 11193
554
+ },
555
+ {
556
+ "epoch": 40.0,
557
+ "eval_accuracy": 0.8940039318479686,
558
+ "eval_loss": 2.813732147216797,
559
+ "eval_model_preparation_time": 0.0056,
560
+ "eval_runtime": 53.0566,
561
+ "eval_samples_per_second": 115.047,
562
+ "eval_steps_per_second": 14.381,
563
+ "step": 11480
564
+ },
565
+ {
566
+ "epoch": 40.069686411149824,
567
+ "grad_norm": 1.753510594367981,
568
+ "learning_rate": 6.00349040139616e-07,
569
+ "loss": 2.7729,
570
+ "step": 11500
571
+ },
572
+ {
573
+ "epoch": 41.0,
574
+ "eval_accuracy": 0.8948230668414154,
575
+ "eval_loss": 2.7919933795928955,
576
+ "eval_model_preparation_time": 0.0056,
577
+ "eval_runtime": 53.4649,
578
+ "eval_samples_per_second": 114.168,
579
+ "eval_steps_per_second": 14.271,
580
+ "step": 11767
581
+ },
582
+ {
583
+ "epoch": 41.81184668989547,
584
+ "grad_norm": 2.048759937286377,
585
+ "learning_rate": 5.828970331588133e-07,
586
+ "loss": 2.7358,
587
+ "step": 12000
588
+ },
589
+ {
590
+ "epoch": 42.0,
591
+ "eval_accuracy": 0.8956422018348624,
592
+ "eval_loss": 2.7705893516540527,
593
+ "eval_model_preparation_time": 0.0056,
594
+ "eval_runtime": 52.8824,
595
+ "eval_samples_per_second": 115.426,
596
+ "eval_steps_per_second": 14.428,
597
+ "step": 12054
598
+ },
599
+ {
600
+ "epoch": 43.0,
601
+ "eval_accuracy": 0.89629750982962,
602
+ "eval_loss": 2.7493233680725098,
603
+ "eval_model_preparation_time": 0.0056,
604
+ "eval_runtime": 53.4446,
605
+ "eval_samples_per_second": 114.212,
606
+ "eval_steps_per_second": 14.276,
607
+ "step": 12341
608
+ },
609
+ {
610
+ "epoch": 43.55400696864111,
611
+ "grad_norm": 1.7660369873046875,
612
+ "learning_rate": 5.654450261780105e-07,
613
+ "loss": 2.6966,
614
+ "step": 12500
615
+ },
616
+ {
617
+ "epoch": 44.0,
618
+ "eval_accuracy": 0.8985910878112713,
619
+ "eval_loss": 2.7291805744171143,
620
+ "eval_model_preparation_time": 0.0056,
621
+ "eval_runtime": 52.7207,
622
+ "eval_samples_per_second": 115.78,
623
+ "eval_steps_per_second": 14.472,
624
+ "step": 12628
625
+ },
626
+ {
627
+ "epoch": 45.0,
628
+ "eval_accuracy": 0.8989187418086501,
629
+ "eval_loss": 2.708031415939331,
630
+ "eval_model_preparation_time": 0.0056,
631
+ "eval_runtime": 52.6994,
632
+ "eval_samples_per_second": 115.827,
633
+ "eval_steps_per_second": 14.478,
634
+ "step": 12915
635
+ },
636
+ {
637
+ "epoch": 45.29616724738676,
638
+ "grad_norm": 1.8633924722671509,
639
+ "learning_rate": 5.479930191972077e-07,
640
+ "loss": 2.6579,
641
+ "step": 13000
642
+ },
643
+ {
644
+ "epoch": 46.0,
645
+ "eval_accuracy": 0.8989187418086501,
646
+ "eval_loss": 2.6885175704956055,
647
+ "eval_model_preparation_time": 0.0056,
648
+ "eval_runtime": 52.82,
649
+ "eval_samples_per_second": 115.562,
650
+ "eval_steps_per_second": 14.445,
651
+ "step": 13202
652
+ },
653
+ {
654
+ "epoch": 47.0,
655
+ "eval_accuracy": 0.9008846657929227,
656
+ "eval_loss": 2.669055461883545,
657
+ "eval_model_preparation_time": 0.0056,
658
+ "eval_runtime": 52.9106,
659
+ "eval_samples_per_second": 115.364,
660
+ "eval_steps_per_second": 14.421,
661
+ "step": 13489
662
+ },
663
+ {
664
+ "epoch": 47.038327526132406,
665
+ "grad_norm": 2.1170654296875,
666
+ "learning_rate": 5.305410122164049e-07,
667
+ "loss": 2.6229,
668
+ "step": 13500
669
+ },
670
+ {
671
+ "epoch": 48.0,
672
+ "eval_accuracy": 0.901048492791612,
673
+ "eval_loss": 2.6509358882904053,
674
+ "eval_model_preparation_time": 0.0056,
675
+ "eval_runtime": 53.208,
676
+ "eval_samples_per_second": 114.72,
677
+ "eval_steps_per_second": 14.34,
678
+ "step": 13776
679
+ },
680
+ {
681
+ "epoch": 48.78048780487805,
682
+ "grad_norm": 2.170200824737549,
683
+ "learning_rate": 5.13089005235602e-07,
684
+ "loss": 2.5897,
685
+ "step": 14000
686
+ },
687
+ {
688
+ "epoch": 49.0,
689
+ "eval_accuracy": 0.9023591087811271,
690
+ "eval_loss": 2.6330766677856445,
691
+ "eval_model_preparation_time": 0.0056,
692
+ "eval_runtime": 53.5861,
693
+ "eval_samples_per_second": 113.91,
694
+ "eval_steps_per_second": 14.239,
695
+ "step": 14063
696
+ },
697
+ {
698
+ "epoch": 50.0,
699
+ "eval_accuracy": 0.9026867627785059,
700
+ "eval_loss": 2.615280866622925,
701
+ "eval_model_preparation_time": 0.0056,
702
+ "eval_runtime": 53.1713,
703
+ "eval_samples_per_second": 114.799,
704
+ "eval_steps_per_second": 14.35,
705
+ "step": 14350
706
+ },
707
+ {
708
+ "epoch": 50.52264808362369,
709
+ "grad_norm": 2.208359956741333,
710
+ "learning_rate": 4.956369982547993e-07,
711
+ "loss": 2.5583,
712
+ "step": 14500
713
+ },
714
+ {
715
+ "epoch": 51.0,
716
+ "eval_accuracy": 0.9025229357798165,
717
+ "eval_loss": 2.5973732471466064,
718
+ "eval_model_preparation_time": 0.0056,
719
+ "eval_runtime": 52.7169,
720
+ "eval_samples_per_second": 115.788,
721
+ "eval_steps_per_second": 14.474,
722
+ "step": 14637
723
+ },
724
+ {
725
+ "epoch": 52.0,
726
+ "eval_accuracy": 0.9043250327653998,
727
+ "eval_loss": 2.581191062927246,
728
+ "eval_model_preparation_time": 0.0056,
729
+ "eval_runtime": 54.1621,
730
+ "eval_samples_per_second": 112.699,
731
+ "eval_steps_per_second": 14.087,
732
+ "step": 14924
733
+ },
734
+ {
735
+ "epoch": 52.26480836236934,
736
+ "grad_norm": 2.159825325012207,
737
+ "learning_rate": 4.781849912739964e-07,
738
+ "loss": 2.5257,
739
+ "step": 15000
740
+ },
741
+ {
742
+ "epoch": 53.0,
743
+ "eval_accuracy": 0.9035058977719528,
744
+ "eval_loss": 2.564519166946411,
745
+ "eval_model_preparation_time": 0.0056,
746
+ "eval_runtime": 54.0888,
747
+ "eval_samples_per_second": 112.851,
748
+ "eval_steps_per_second": 14.106,
749
+ "step": 15211
750
+ },
751
+ {
752
+ "epoch": 54.0,
753
+ "eval_accuracy": 0.9041612057667103,
754
+ "eval_loss": 2.5490972995758057,
755
+ "eval_model_preparation_time": 0.0056,
756
+ "eval_runtime": 54.8624,
757
+ "eval_samples_per_second": 111.26,
758
+ "eval_steps_per_second": 13.908,
759
+ "step": 15498
760
+ },
761
+ {
762
+ "epoch": 54.00696864111498,
763
+ "grad_norm": 1.8598278760910034,
764
+ "learning_rate": 4.607329842931937e-07,
765
+ "loss": 2.4986,
766
+ "step": 15500
767
+ },
768
+ {
769
+ "epoch": 55.0,
770
+ "eval_accuracy": 0.9059633027522935,
771
+ "eval_loss": 2.533395528793335,
772
+ "eval_model_preparation_time": 0.0056,
773
+ "eval_runtime": 52.7918,
774
+ "eval_samples_per_second": 115.624,
775
+ "eval_steps_per_second": 14.453,
776
+ "step": 15785
777
+ },
778
+ {
779
+ "epoch": 55.749128919860624,
780
+ "grad_norm": 1.9237112998962402,
781
+ "learning_rate": 4.4328097731239087e-07,
782
+ "loss": 2.4703,
783
+ "step": 16000
784
+ },
785
+ {
786
+ "epoch": 56.0,
787
+ "eval_accuracy": 0.9056356487549148,
788
+ "eval_loss": 2.5186452865600586,
789
+ "eval_model_preparation_time": 0.0056,
790
+ "eval_runtime": 52.6994,
791
+ "eval_samples_per_second": 115.827,
792
+ "eval_steps_per_second": 14.478,
793
+ "step": 16072
794
+ },
795
+ {
796
+ "epoch": 57.0,
797
+ "eval_accuracy": 0.906127129750983,
798
+ "eval_loss": 2.504196882247925,
799
+ "eval_model_preparation_time": 0.0056,
800
+ "eval_runtime": 53.0824,
801
+ "eval_samples_per_second": 114.991,
802
+ "eval_steps_per_second": 14.374,
803
+ "step": 16359
804
+ },
805
+ {
806
+ "epoch": 57.491289198606275,
807
+ "grad_norm": 2.3951334953308105,
808
+ "learning_rate": 4.258289703315881e-07,
809
+ "loss": 2.444,
810
+ "step": 16500
811
+ },
812
+ {
813
+ "epoch": 58.0,
814
+ "eval_accuracy": 0.9092398427260813,
815
+ "eval_loss": 2.48964262008667,
816
+ "eval_model_preparation_time": 0.0056,
817
+ "eval_runtime": 52.6693,
818
+ "eval_samples_per_second": 115.893,
819
+ "eval_steps_per_second": 14.487,
820
+ "step": 16646
821
+ },
822
+ {
823
+ "epoch": 59.0,
824
+ "eval_accuracy": 0.9092398427260813,
825
+ "eval_loss": 2.4755215644836426,
826
+ "eval_model_preparation_time": 0.0056,
827
+ "eval_runtime": 53.0222,
828
+ "eval_samples_per_second": 115.121,
829
+ "eval_steps_per_second": 14.39,
830
+ "step": 16933
831
+ },
832
+ {
833
+ "epoch": 59.23344947735192,
834
+ "grad_norm": 2.315953493118286,
835
+ "learning_rate": 4.083769633507853e-07,
836
+ "loss": 2.4157,
837
+ "step": 17000
838
+ },
839
+ {
840
+ "epoch": 60.0,
841
+ "eval_accuracy": 0.908748361730013,
842
+ "eval_loss": 2.4624128341674805,
843
+ "eval_model_preparation_time": 0.0056,
844
+ "eval_runtime": 53.0917,
845
+ "eval_samples_per_second": 114.971,
846
+ "eval_steps_per_second": 14.371,
847
+ "step": 17220
848
+ },
849
+ {
850
+ "epoch": 60.97560975609756,
851
+ "grad_norm": 2.262585401535034,
852
+ "learning_rate": 3.909249563699825e-07,
853
+ "loss": 2.3904,
854
+ "step": 17500
855
+ },
856
+ {
857
+ "epoch": 61.0,
858
+ "eval_accuracy": 0.90956749672346,
859
+ "eval_loss": 2.448362112045288,
860
+ "eval_model_preparation_time": 0.0056,
861
+ "eval_runtime": 53.2314,
862
+ "eval_samples_per_second": 114.669,
863
+ "eval_steps_per_second": 14.334,
864
+ "step": 17507
865
+ },
866
+ {
867
+ "epoch": 62.0,
868
+ "eval_accuracy": 0.9102228047182176,
869
+ "eval_loss": 2.435769557952881,
870
+ "eval_model_preparation_time": 0.0056,
871
+ "eval_runtime": 55.5115,
872
+ "eval_samples_per_second": 109.959,
873
+ "eval_steps_per_second": 13.745,
874
+ "step": 17794
875
+ },
876
+ {
877
+ "epoch": 62.717770034843205,
878
+ "grad_norm": 1.9645085334777832,
879
+ "learning_rate": 3.7347294938917974e-07,
880
+ "loss": 2.367,
881
+ "step": 18000
882
+ },
883
+ {
884
+ "epoch": 63.0,
885
+ "eval_accuracy": 0.9110419397116645,
886
+ "eval_loss": 2.422717809677124,
887
+ "eval_model_preparation_time": 0.0056,
888
+ "eval_runtime": 55.4553,
889
+ "eval_samples_per_second": 110.071,
890
+ "eval_steps_per_second": 13.759,
891
+ "step": 18081
892
+ },
893
+ {
894
+ "epoch": 64.0,
895
+ "eval_accuracy": 0.9118610747051114,
896
+ "eval_loss": 2.4112889766693115,
897
+ "eval_model_preparation_time": 0.0056,
898
+ "eval_runtime": 54.3035,
899
+ "eval_samples_per_second": 112.405,
900
+ "eval_steps_per_second": 14.051,
901
+ "step": 18368
902
+ },
903
+ {
904
+ "epoch": 64.45993031358886,
905
+ "grad_norm": 2.300833225250244,
906
+ "learning_rate": 3.5602094240837693e-07,
907
+ "loss": 2.3506,
908
+ "step": 18500
909
+ },
910
+ {
911
+ "epoch": 65.0,
912
+ "eval_accuracy": 0.9120249017038008,
913
+ "eval_loss": 2.399442434310913,
914
+ "eval_model_preparation_time": 0.0056,
915
+ "eval_runtime": 54.4675,
916
+ "eval_samples_per_second": 112.067,
917
+ "eval_steps_per_second": 14.008,
918
+ "step": 18655
919
+ },
920
+ {
921
+ "epoch": 66.0,
922
+ "eval_accuracy": 0.9125163826998689,
923
+ "eval_loss": 2.3884286880493164,
924
+ "eval_model_preparation_time": 0.0056,
925
+ "eval_runtime": 54.201,
926
+ "eval_samples_per_second": 112.618,
927
+ "eval_steps_per_second": 14.077,
928
+ "step": 18942
929
+ },
930
+ {
931
+ "epoch": 66.2020905923345,
932
+ "grad_norm": 2.1156551837921143,
933
+ "learning_rate": 3.3856893542757413e-07,
934
+ "loss": 2.3237,
935
+ "step": 19000
936
+ },
937
+ {
938
+ "epoch": 67.0,
939
+ "eval_accuracy": 0.9130078636959371,
940
+ "eval_loss": 2.3775436878204346,
941
+ "eval_model_preparation_time": 0.0056,
942
+ "eval_runtime": 54.1909,
943
+ "eval_samples_per_second": 112.639,
944
+ "eval_steps_per_second": 14.08,
945
+ "step": 19229
946
+ },
947
+ {
948
+ "epoch": 67.94425087108014,
949
+ "grad_norm": 1.9504848718643188,
950
+ "learning_rate": 3.2111692844677137e-07,
951
+ "loss": 2.3053,
952
+ "step": 19500
953
+ },
954
+ {
955
+ "epoch": 68.0,
956
+ "eval_accuracy": 0.9133355176933159,
957
+ "eval_loss": 2.3665952682495117,
958
+ "eval_model_preparation_time": 0.0056,
959
+ "eval_runtime": 54.7045,
960
+ "eval_samples_per_second": 111.581,
961
+ "eval_steps_per_second": 13.948,
962
+ "step": 19516
963
+ },
964
+ {
965
+ "epoch": 69.0,
966
+ "eval_accuracy": 0.9131716906946264,
967
+ "eval_loss": 2.356461763381958,
968
+ "eval_model_preparation_time": 0.0056,
969
+ "eval_runtime": 53.8382,
970
+ "eval_samples_per_second": 113.377,
971
+ "eval_steps_per_second": 14.172,
972
+ "step": 19803
973
+ },
974
+ {
975
+ "epoch": 69.68641114982579,
976
+ "grad_norm": 1.9092929363250732,
977
+ "learning_rate": 3.0366492146596856e-07,
978
+ "loss": 2.285,
979
+ "step": 20000
980
+ },
981
+ {
982
+ "epoch": 70.0,
983
+ "eval_accuracy": 0.9139908256880734,
984
+ "eval_loss": 2.34682559967041,
985
+ "eval_model_preparation_time": 0.0056,
986
+ "eval_runtime": 53.9142,
987
+ "eval_samples_per_second": 113.217,
988
+ "eval_steps_per_second": 14.152,
989
+ "step": 20090
990
+ },
991
+ {
992
+ "epoch": 71.0,
993
+ "eval_accuracy": 0.9141546526867628,
994
+ "eval_loss": 2.3373236656188965,
995
+ "eval_model_preparation_time": 0.0056,
996
+ "eval_runtime": 56.1241,
997
+ "eval_samples_per_second": 108.759,
998
+ "eval_steps_per_second": 13.595,
999
+ "step": 20377
1000
+ },
1001
+ {
1002
+ "epoch": 71.42857142857143,
1003
+ "grad_norm": 2.050865411758423,
1004
+ "learning_rate": 2.8621291448516576e-07,
1005
+ "loss": 2.2706,
1006
+ "step": 20500
1007
+ },
1008
+ {
1009
+ "epoch": 72.0,
1010
+ "eval_accuracy": 0.9141546526867628,
1011
+ "eval_loss": 2.3278188705444336,
1012
+ "eval_model_preparation_time": 0.0056,
1013
+ "eval_runtime": 52.6008,
1014
+ "eval_samples_per_second": 116.044,
1015
+ "eval_steps_per_second": 14.505,
1016
+ "step": 20664
1017
+ },
1018
+ {
1019
+ "epoch": 73.0,
1020
+ "eval_accuracy": 0.9148099606815203,
1021
+ "eval_loss": 2.3193538188934326,
1022
+ "eval_model_preparation_time": 0.0056,
1023
+ "eval_runtime": 53.1272,
1024
+ "eval_samples_per_second": 114.894,
1025
+ "eval_steps_per_second": 14.362,
1026
+ "step": 20951
1027
+ },
1028
+ {
1029
+ "epoch": 73.17073170731707,
1030
+ "grad_norm": 2.2429611682891846,
1031
+ "learning_rate": 2.68760907504363e-07,
1032
+ "loss": 2.2531,
1033
+ "step": 21000
1034
+ },
1035
+ {
1036
+ "epoch": 74.0,
1037
+ "eval_accuracy": 0.9161205766710354,
1038
+ "eval_loss": 2.3109400272369385,
1039
+ "eval_model_preparation_time": 0.0056,
1040
+ "eval_runtime": 54.1757,
1041
+ "eval_samples_per_second": 112.671,
1042
+ "eval_steps_per_second": 14.084,
1043
+ "step": 21238
1044
+ },
1045
+ {
1046
+ "epoch": 74.91289198606272,
1047
+ "grad_norm": 2.5070650577545166,
1048
+ "learning_rate": 2.513089005235602e-07,
1049
+ "loss": 2.2372,
1050
+ "step": 21500
1051
+ },
1052
+ {
1053
+ "epoch": 75.0,
1054
+ "eval_accuracy": 0.9157929226736566,
1055
+ "eval_loss": 2.3027830123901367,
1056
+ "eval_model_preparation_time": 0.0056,
1057
+ "eval_runtime": 53.9429,
1058
+ "eval_samples_per_second": 113.157,
1059
+ "eval_steps_per_second": 14.145,
1060
+ "step": 21525
1061
+ },
1062
+ {
1063
+ "epoch": 76.0,
1064
+ "eval_accuracy": 0.915956749672346,
1065
+ "eval_loss": 2.2948713302612305,
1066
+ "eval_model_preparation_time": 0.0056,
1067
+ "eval_runtime": 53.7032,
1068
+ "eval_samples_per_second": 113.662,
1069
+ "eval_steps_per_second": 14.208,
1070
+ "step": 21812
1071
+ },
1072
+ {
1073
+ "epoch": 76.65505226480836,
1074
+ "grad_norm": 1.9022382497787476,
1075
+ "learning_rate": 2.338568935427574e-07,
1076
+ "loss": 2.223,
1077
+ "step": 22000
1078
+ },
1079
+ {
1080
+ "epoch": 77.0,
1081
+ "eval_accuracy": 0.9151376146788991,
1082
+ "eval_loss": 2.2872207164764404,
1083
+ "eval_model_preparation_time": 0.0056,
1084
+ "eval_runtime": 52.4931,
1085
+ "eval_samples_per_second": 116.282,
1086
+ "eval_steps_per_second": 14.535,
1087
+ "step": 22099
1088
+ },
1089
+ {
1090
+ "epoch": 78.0,
1091
+ "eval_accuracy": 0.9161205766710354,
1092
+ "eval_loss": 2.2804179191589355,
1093
+ "eval_model_preparation_time": 0.0056,
1094
+ "eval_runtime": 52.9333,
1095
+ "eval_samples_per_second": 115.315,
1096
+ "eval_steps_per_second": 14.414,
1097
+ "step": 22386
1098
+ },
1099
+ {
1100
+ "epoch": 78.397212543554,
1101
+ "grad_norm": 2.776266574859619,
1102
+ "learning_rate": 2.164048865619546e-07,
1103
+ "loss": 2.2134,
1104
+ "step": 22500
1105
+ },
1106
+ {
1107
+ "epoch": 79.0,
1108
+ "eval_accuracy": 0.9164482306684142,
1109
+ "eval_loss": 2.2735037803649902,
1110
+ "eval_model_preparation_time": 0.0056,
1111
+ "eval_runtime": 51.885,
1112
+ "eval_samples_per_second": 117.645,
1113
+ "eval_steps_per_second": 14.706,
1114
+ "step": 22673
1115
+ },
1116
+ {
1117
+ "epoch": 80.0,
1118
+ "eval_accuracy": 0.9166120576671035,
1119
+ "eval_loss": 2.266995668411255,
1120
+ "eval_model_preparation_time": 0.0056,
1121
+ "eval_runtime": 52.3579,
1122
+ "eval_samples_per_second": 116.582,
1123
+ "eval_steps_per_second": 14.573,
1124
+ "step": 22960
1125
+ },
1126
+ {
1127
+ "epoch": 80.13937282229965,
1128
+ "grad_norm": 2.38444447517395,
1129
+ "learning_rate": 1.989528795811518e-07,
1130
+ "loss": 2.195,
1131
+ "step": 23000
1132
+ },
1133
+ {
1134
+ "epoch": 81.0,
1135
+ "eval_accuracy": 0.9166120576671035,
1136
+ "eval_loss": 2.2608718872070312,
1137
+ "eval_model_preparation_time": 0.0056,
1138
+ "eval_runtime": 52.4181,
1139
+ "eval_samples_per_second": 116.448,
1140
+ "eval_steps_per_second": 14.556,
1141
+ "step": 23247
1142
+ },
1143
+ {
1144
+ "epoch": 81.88153310104529,
1145
+ "grad_norm": 2.126260757446289,
1146
+ "learning_rate": 1.8150087260034902e-07,
1147
+ "loss": 2.1856,
1148
+ "step": 23500
1149
+ },
1150
+ {
1151
+ "epoch": 82.0,
1152
+ "eval_accuracy": 0.9166120576671035,
1153
+ "eval_loss": 2.2549705505371094,
1154
+ "eval_model_preparation_time": 0.0056,
1155
+ "eval_runtime": 52.0305,
1156
+ "eval_samples_per_second": 117.316,
1157
+ "eval_steps_per_second": 14.664,
1158
+ "step": 23534
1159
+ },
1160
+ {
1161
+ "epoch": 83.0,
1162
+ "eval_accuracy": 0.9175950196592398,
1163
+ "eval_loss": 2.2493834495544434,
1164
+ "eval_model_preparation_time": 0.0056,
1165
+ "eval_runtime": 51.6617,
1166
+ "eval_samples_per_second": 118.153,
1167
+ "eval_steps_per_second": 14.769,
1168
+ "step": 23821
1169
+ },
1170
+ {
1171
+ "epoch": 83.62369337979094,
1172
+ "grad_norm": 2.363184928894043,
1173
+ "learning_rate": 1.6404886561954624e-07,
1174
+ "loss": 2.1759,
1175
+ "step": 24000
1176
+ },
1177
+ {
1178
+ "epoch": 84.0,
1179
+ "eval_accuracy": 0.9174311926605505,
1180
+ "eval_loss": 2.244133949279785,
1181
+ "eval_model_preparation_time": 0.0056,
1182
+ "eval_runtime": 51.6512,
1183
+ "eval_samples_per_second": 118.177,
1184
+ "eval_steps_per_second": 14.772,
1185
+ "step": 24108
1186
+ },
1187
+ {
1188
+ "epoch": 85.0,
1189
+ "eval_accuracy": 0.918086500655308,
1190
+ "eval_loss": 2.2390334606170654,
1191
+ "eval_model_preparation_time": 0.0056,
1192
+ "eval_runtime": 50.8842,
1193
+ "eval_samples_per_second": 119.959,
1194
+ "eval_steps_per_second": 14.995,
1195
+ "step": 24395
1196
+ },
1197
+ {
1198
+ "epoch": 85.36585365853658,
1199
+ "grad_norm": 2.263583183288574,
1200
+ "learning_rate": 1.4659685863874343e-07,
1201
+ "loss": 2.1669,
1202
+ "step": 24500
1203
+ },
1204
+ {
1205
+ "epoch": 86.0,
1206
+ "eval_accuracy": 0.9184141546526867,
1207
+ "eval_loss": 2.2345142364501953,
1208
+ "eval_model_preparation_time": 0.0056,
1209
+ "eval_runtime": 52.6434,
1210
+ "eval_samples_per_second": 115.95,
1211
+ "eval_steps_per_second": 14.494,
1212
+ "step": 24682
1213
+ },
1214
+ {
1215
+ "epoch": 87.0,
1216
+ "eval_accuracy": 0.9182503276539974,
1217
+ "eval_loss": 2.230360984802246,
1218
+ "eval_model_preparation_time": 0.0056,
1219
+ "eval_runtime": 53.4573,
1220
+ "eval_samples_per_second": 114.185,
1221
+ "eval_steps_per_second": 14.273,
1222
+ "step": 24969
1223
+ },
1224
+ {
1225
+ "epoch": 87.10801393728222,
1226
+ "grad_norm": 2.30412220954895,
1227
+ "learning_rate": 1.2914485165794065e-07,
1228
+ "loss": 2.1574,
1229
+ "step": 25000
1230
+ },
1231
+ {
1232
+ "epoch": 88.0,
1233
+ "eval_accuracy": 0.9185779816513762,
1234
+ "eval_loss": 2.226450204849243,
1235
+ "eval_model_preparation_time": 0.0056,
1236
+ "eval_runtime": 53.9596,
1237
+ "eval_samples_per_second": 113.122,
1238
+ "eval_steps_per_second": 14.14,
1239
+ "step": 25256
1240
+ },
1241
+ {
1242
+ "epoch": 88.85017421602788,
1243
+ "grad_norm": 2.065523147583008,
1244
+ "learning_rate": 1.1169284467713787e-07,
1245
+ "loss": 2.152,
1246
+ "step": 25500
1247
+ },
1248
+ {
1249
+ "epoch": 89.0,
1250
+ "eval_accuracy": 0.9189056356487549,
1251
+ "eval_loss": 2.2227816581726074,
1252
+ "eval_model_preparation_time": 0.0056,
1253
+ "eval_runtime": 54.0621,
1254
+ "eval_samples_per_second": 112.907,
1255
+ "eval_steps_per_second": 14.113,
1256
+ "step": 25543
1257
+ },
1258
+ {
1259
+ "epoch": 90.0,
1260
+ "eval_accuracy": 0.9185779816513762,
1261
+ "eval_loss": 2.2196154594421387,
1262
+ "eval_model_preparation_time": 0.0056,
1263
+ "eval_runtime": 54.1756,
1264
+ "eval_samples_per_second": 112.671,
1265
+ "eval_steps_per_second": 14.084,
1266
+ "step": 25830
1267
+ },
1268
+ {
1269
+ "epoch": 90.59233449477352,
1270
+ "grad_norm": 2.453361749649048,
1271
+ "learning_rate": 9.424083769633507e-08,
1272
+ "loss": 2.1462,
1273
+ "step": 26000
1274
+ },
1275
+ {
1276
+ "epoch": 91.0,
1277
+ "eval_accuracy": 0.9182503276539974,
1278
+ "eval_loss": 2.216709613800049,
1279
+ "eval_model_preparation_time": 0.0056,
1280
+ "eval_runtime": 53.7343,
1281
+ "eval_samples_per_second": 113.596,
1282
+ "eval_steps_per_second": 14.199,
1283
+ "step": 26117
1284
+ },
1285
+ {
1286
+ "epoch": 92.0,
1287
+ "eval_accuracy": 0.9185779816513762,
1288
+ "eval_loss": 2.214052438735962,
1289
+ "eval_model_preparation_time": 0.0056,
1290
+ "eval_runtime": 53.4012,
1291
+ "eval_samples_per_second": 114.305,
1292
+ "eval_steps_per_second": 14.288,
1293
+ "step": 26404
1294
+ },
1295
+ {
1296
+ "epoch": 92.33449477351917,
1297
+ "grad_norm": 2.680849313735962,
1298
+ "learning_rate": 7.678883071553228e-08,
1299
+ "loss": 2.1384,
1300
+ "step": 26500
1301
+ },
1302
+ {
1303
+ "epoch": 93.0,
1304
+ "eval_accuracy": 0.9185779816513762,
1305
+ "eval_loss": 2.2115318775177,
1306
+ "eval_model_preparation_time": 0.0056,
1307
+ "eval_runtime": 54.7905,
1308
+ "eval_samples_per_second": 111.406,
1309
+ "eval_steps_per_second": 13.926,
1310
+ "step": 26691
1311
+ },
1312
+ {
1313
+ "epoch": 94.0,
1314
+ "eval_accuracy": 0.9184141546526867,
1315
+ "eval_loss": 2.209531784057617,
1316
+ "eval_model_preparation_time": 0.0056,
1317
+ "eval_runtime": 53.296,
1318
+ "eval_samples_per_second": 114.53,
1319
+ "eval_steps_per_second": 14.316,
1320
+ "step": 26978
1321
+ },
1322
+ {
1323
+ "epoch": 94.07665505226481,
1324
+ "grad_norm": 1.967248558998108,
1325
+ "learning_rate": 5.933682373472949e-08,
1326
+ "loss": 2.1353,
1327
+ "step": 27000
1328
+ },
1329
+ {
1330
+ "epoch": 95.0,
1331
+ "eval_accuracy": 0.9190694626474443,
1332
+ "eval_loss": 2.207920551300049,
1333
+ "eval_model_preparation_time": 0.0056,
1334
+ "eval_runtime": 53.7765,
1335
+ "eval_samples_per_second": 113.507,
1336
+ "eval_steps_per_second": 14.188,
1337
+ "step": 27265
1338
+ },
1339
+ {
1340
+ "epoch": 95.81881533101046,
1341
+ "grad_norm": 2.0648317337036133,
1342
+ "learning_rate": 4.1884816753926704e-08,
1343
+ "loss": 2.1324,
1344
+ "step": 27500
1345
+ },
1346
+ {
1347
+ "epoch": 96.0,
1348
+ "eval_accuracy": 0.9190694626474443,
1349
+ "eval_loss": 2.2065632343292236,
1350
+ "eval_model_preparation_time": 0.0056,
1351
+ "eval_runtime": 54.6903,
1352
+ "eval_samples_per_second": 111.61,
1353
+ "eval_steps_per_second": 13.951,
1354
+ "step": 27552
1355
+ },
1356
+ {
1357
+ "epoch": 97.0,
1358
+ "eval_accuracy": 0.9190694626474443,
1359
+ "eval_loss": 2.2054483890533447,
1360
+ "eval_model_preparation_time": 0.0056,
1361
+ "eval_runtime": 54.5908,
1362
+ "eval_samples_per_second": 111.814,
1363
+ "eval_steps_per_second": 13.977,
1364
+ "step": 27839
1365
+ },
1366
+ {
1367
+ "epoch": 97.5609756097561,
1368
+ "grad_norm": 2.1506221294403076,
1369
+ "learning_rate": 2.4432809773123906e-08,
1370
+ "loss": 2.1307,
1371
+ "step": 28000
1372
+ },
1373
+ {
1374
+ "epoch": 98.0,
1375
+ "eval_accuracy": 0.9190694626474443,
1376
+ "eval_loss": 2.2046942710876465,
1377
+ "eval_model_preparation_time": 0.0056,
1378
+ "eval_runtime": 54.8026,
1379
+ "eval_samples_per_second": 111.382,
1380
+ "eval_steps_per_second": 13.923,
1381
+ "step": 28126
1382
+ },
1383
+ {
1384
+ "epoch": 99.0,
1385
+ "eval_accuracy": 0.9192332896461337,
1386
+ "eval_loss": 2.2042276859283447,
1387
+ "eval_model_preparation_time": 0.0056,
1388
+ "eval_runtime": 54.9177,
1389
+ "eval_samples_per_second": 111.148,
1390
+ "eval_steps_per_second": 13.894,
1391
+ "step": 28413
1392
+ },
1393
+ {
1394
+ "epoch": 99.30313588850174,
1395
+ "grad_norm": 1.9334636926651,
1396
+ "learning_rate": 6.980802792321117e-09,
1397
+ "loss": 2.1278,
1398
+ "step": 28500
1399
+ },
1400
+ {
1401
+ "epoch": 100.0,
1402
+ "eval_accuracy": 0.9192332896461337,
1403
+ "eval_loss": 2.204108953475952,
1404
+ "eval_model_preparation_time": 0.0056,
1405
+ "eval_runtime": 54.2227,
1406
+ "eval_samples_per_second": 112.573,
1407
+ "eval_steps_per_second": 14.072,
1408
+ "step": 28700
1409
+ }
1410
+ ],
1411
+ "logging_steps": 500,
1412
+ "max_steps": 28700,
1413
+ "num_input_tokens_seen": 0,
1414
+ "num_train_epochs": 100,
1415
+ "save_steps": 500,
1416
+ "stateful_callbacks": {
1417
+ "TrainerControl": {
1418
+ "args": {
1419
+ "should_epoch_stop": false,
1420
+ "should_evaluate": false,
1421
+ "should_log": false,
1422
+ "should_save": true,
1423
+ "should_training_stop": true
1424
+ },
1425
+ "attributes": {}
1426
+ }
1427
+ },
1428
+ "total_flos": 7.099490807006331e+19,
1429
+ "train_batch_size": 32,
1430
+ "trial_name": null,
1431
+ "trial_params": null
1432
+ }
checkpoint-28700/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:375fecfe3250f0d7d4d281c005412dc987d92419d67a86bffe4969cb248991cd
3
+ size 4731
config.json CHANGED
@@ -12,74 +12,73 @@
12
  "0": "Afghan",
13
  "1": "African Wild Dog",
14
  "2": "Airedale",
15
- "3": "American Spaniel",
16
- "4": "American Hairless",
17
- "5": "American Spaniel",
18
- "6": "Basenji",
19
- "7": "Basset",
20
- "8": "Beagle",
21
- "9": "Bearded Collie",
22
- "10": "Bermaise",
23
- "11": "Bichon Frise",
24
- "12": "Blenheim",
25
- "13": "Bloodhound",
26
- "14": "Bluetick",
27
- "15": "Border Collie",
28
- "16": "Borzoi",
29
- "17": "Boston Terrier",
30
- "18": "Boxer",
31
- "19": "Bull Mastiff",
32
- "20": "Bull Terrier",
33
- "21": "Bulldog",
34
- "22": "Cairn",
35
- "23": "Chihuahua",
36
- "24": "Chinese Crested",
37
- "25": "Chow",
38
- "26": "Clumber",
39
- "27": "Cockapoo",
40
- "28": "Cocker",
41
- "29": "Collie",
42
- "30": "Corgi",
43
- "31": "Coyote",
44
- "32": "Dalmation",
45
- "33": "Dhole",
46
- "34": "Dingo",
47
- "35": "Doberman",
48
- "36": "Elk Hound",
49
- "37": "French Bulldog",
50
- "38": "German Sheperd",
51
- "39": "Golden Retriever",
52
- "40": "Great Dane",
53
- "41": "Great Perenees",
54
- "42": "Greyhound",
55
- "43": "Groenendael",
56
- "44": "Irish Spaniel",
57
- "45": "Irish Wolfhound",
58
- "46": "Japanese Spaniel",
59
- "47": "Komondor",
60
- "48": "Labradoodle",
61
- "49": "Labrador",
62
- "50": "Lhasa",
63
- "51": "Malinois",
64
- "52": "Maltese",
65
- "53": "Mex Hairless",
66
- "54": "Newfoundland",
67
- "55": "Pekinese",
68
- "56": "Pit Bull",
69
- "57": "Pomeranian",
70
- "58": "Poodle",
71
- "59": "Pug",
72
- "60": "Rhodesian",
73
- "61": "Rottweiler",
74
- "62": "Saint Bernard",
75
- "63": "Schnauzer",
76
- "64": "Scotch Terrier",
77
- "65": "Shar_Pei",
78
- "66": "Shiba Inu",
79
- "67": "Shih-Tzu",
80
- "68": "Siberian Husky",
81
- "69": "Vizsla",
82
- "70": "Yorkie"
83
  },
84
  "image_size": 224,
85
  "initializer_range": 0.02,
@@ -88,74 +87,73 @@
88
  "Afghan": 0,
89
  "African Wild Dog": 1,
90
  "Airedale": 2,
91
- "American Spaniel": 3,
92
- "American Hairless": 4,
93
- "American Spaniel": 5,
94
- "Basenji": 6,
95
- "Basset": 7,
96
- "Beagle": 8,
97
- "Bearded Collie": 9,
98
- "Bermaise": 10,
99
- "Bichon Frise": 11,
100
- "Blenheim": 12,
101
- "Bloodhound": 13,
102
- "Bluetick": 14,
103
- "Border Collie": 15,
104
- "Borzoi": 16,
105
- "Boston Terrier": 17,
106
- "Boxer": 18,
107
- "Bull Mastiff": 19,
108
- "Bull Terrier": 20,
109
- "Bulldog": 21,
110
- "Cairn": 22,
111
- "Chihuahua": 23,
112
- "Chinese Crested": 24,
113
- "Chow": 25,
114
- "Clumber": 26,
115
- "Cockapoo": 27,
116
- "Cocker": 28,
117
- "Collie": 29,
118
- "Corgi": 30,
119
- "Coyote": 31,
120
- "Dalmation": 32,
121
- "Dhole": 33,
122
- "Dingo": 34,
123
- "Doberman": 35,
124
- "Elk Hound": 36,
125
- "French Bulldog": 37,
126
- "German Sheperd": 38,
127
- "Golden Retriever": 39,
128
- "Great Dane": 40,
129
- "Great Perenees": 41,
130
- "Greyhound": 42,
131
- "Groenendael": 43,
132
- "Irish Spaniel": 44,
133
- "Irish Wolfhound": 45,
134
- "Japanese Spaniel": 46,
135
- "Komondor": 47,
136
- "Labradoodle": 48,
137
- "Labrador": 49,
138
- "Lhasa": 50,
139
- "Malinois": 51,
140
- "Maltese": 52,
141
- "Mex Hairless": 53,
142
- "Newfoundland": 54,
143
- "Pekinese": 55,
144
- "Pit Bull": 56,
145
- "Pomeranian": 57,
146
- "Poodle": 58,
147
- "Pug": 59,
148
- "Rhodesian": 60,
149
- "Rottweiler": 61,
150
- "Saint Bernard": 62,
151
- "Schnauzer": 63,
152
- "Scotch Terrier": 64,
153
- "Shar_Pei": 65,
154
- "Shiba Inu": 66,
155
- "Shih-Tzu": 67,
156
- "Siberian Husky": 68,
157
- "Vizsla": 69,
158
- "Yorkie": 70
159
  },
160
  "layer_norm_eps": 1e-12,
161
  "model_type": "vit",
 
12
  "0": "Afghan",
13
  "1": "African Wild Dog",
14
  "2": "Airedale",
15
+ "3": "American Hairless",
16
+ "4": "American Spaniel",
17
+ "5": "Basenji",
18
+ "6": "Basset",
19
+ "7": "Beagle",
20
+ "8": "Bearded Collie",
21
+ "9": "Bermaise",
22
+ "10": "Bichon Frise",
23
+ "11": "Blenheim",
24
+ "12": "Bloodhound",
25
+ "13": "Bluetick",
26
+ "14": "Border Collie",
27
+ "15": "Borzoi",
28
+ "16": "Boston Terrier",
29
+ "17": "Boxer",
30
+ "18": "Bull Mastiff",
31
+ "19": "Bull Terrier",
32
+ "20": "Bulldog",
33
+ "21": "Cairn",
34
+ "22": "Chihuahua",
35
+ "23": "Chinese Crested",
36
+ "24": "Chow",
37
+ "25": "Clumber",
38
+ "26": "Cockapoo",
39
+ "27": "Cocker",
40
+ "28": "Collie",
41
+ "29": "Corgi",
42
+ "30": "Coyote",
43
+ "31": "Dalmation",
44
+ "32": "Dhole",
45
+ "33": "Dingo",
46
+ "34": "Doberman",
47
+ "35": "Elk Hound",
48
+ "36": "French Bulldog",
49
+ "37": "German Sheperd",
50
+ "38": "Golden Retriever",
51
+ "39": "Great Dane",
52
+ "40": "Great Perenees",
53
+ "41": "Greyhound",
54
+ "42": "Groenendael",
55
+ "43": "Irish Spaniel",
56
+ "44": "Irish Wolfhound",
57
+ "45": "Japanese Spaniel",
58
+ "46": "Komondor",
59
+ "47": "Labradoodle",
60
+ "48": "Labrador",
61
+ "49": "Lhasa",
62
+ "50": "Malinois",
63
+ "51": "Maltese",
64
+ "52": "Mex Hairless",
65
+ "53": "Newfoundland",
66
+ "54": "Pekinese",
67
+ "55": "Pit Bull",
68
+ "56": "Pomeranian",
69
+ "57": "Poodle",
70
+ "58": "Pug",
71
+ "59": "Rhodesian",
72
+ "60": "Rottweiler",
73
+ "61": "Saint Bernard",
74
+ "62": "Schnauzer",
75
+ "63": "Scotch Terrier",
76
+ "64": "Shar_Pei",
77
+ "65": "Shiba Inu",
78
+ "66": "Shih-Tzu",
79
+ "67": "Siberian Husky",
80
+ "68": "Vizsla",
81
+ "69": "Yorkie"
 
82
  },
83
  "image_size": 224,
84
  "initializer_range": 0.02,
 
87
  "Afghan": 0,
88
  "African Wild Dog": 1,
89
  "Airedale": 2,
90
+ "American Hairless": 3,
91
+ "American Spaniel": 4,
92
+ "Basenji": 5,
93
+ "Basset": 6,
94
+ "Beagle": 7,
95
+ "Bearded Collie": 8,
96
+ "Bermaise": 9,
97
+ "Bichon Frise": 10,
98
+ "Blenheim": 11,
99
+ "Bloodhound": 12,
100
+ "Bluetick": 13,
101
+ "Border Collie": 14,
102
+ "Borzoi": 15,
103
+ "Boston Terrier": 16,
104
+ "Boxer": 17,
105
+ "Bull Mastiff": 18,
106
+ "Bull Terrier": 19,
107
+ "Bulldog": 20,
108
+ "Cairn": 21,
109
+ "Chihuahua": 22,
110
+ "Chinese Crested": 23,
111
+ "Chow": 24,
112
+ "Clumber": 25,
113
+ "Cockapoo": 26,
114
+ "Cocker": 27,
115
+ "Collie": 28,
116
+ "Corgi": 29,
117
+ "Coyote": 30,
118
+ "Dalmation": 31,
119
+ "Dhole": 32,
120
+ "Dingo": 33,
121
+ "Doberman": 34,
122
+ "Elk Hound": 35,
123
+ "French Bulldog": 36,
124
+ "German Sheperd": 37,
125
+ "Golden Retriever": 38,
126
+ "Great Dane": 39,
127
+ "Great Perenees": 40,
128
+ "Greyhound": 41,
129
+ "Groenendael": 42,
130
+ "Irish Spaniel": 43,
131
+ "Irish Wolfhound": 44,
132
+ "Japanese Spaniel": 45,
133
+ "Komondor": 46,
134
+ "Labradoodle": 47,
135
+ "Labrador": 48,
136
+ "Lhasa": 49,
137
+ "Malinois": 50,
138
+ "Maltese": 51,
139
+ "Mex Hairless": 52,
140
+ "Newfoundland": 53,
141
+ "Pekinese": 54,
142
+ "Pit Bull": 55,
143
+ "Pomeranian": 56,
144
+ "Poodle": 57,
145
+ "Pug": 58,
146
+ "Rhodesian": 59,
147
+ "Rottweiler": 60,
148
+ "Saint Bernard": 61,
149
+ "Schnauzer": 62,
150
+ "Scotch Terrier": 63,
151
+ "Shar_Pei": 64,
152
+ "Shiba Inu": 65,
153
+ "Shih-Tzu": 66,
154
+ "Siberian Husky": 67,
155
+ "Vizsla": 68,
156
+ "Yorkie": 69
 
157
  },
158
  "layer_norm_eps": 1e-12,
159
  "model_type": "vit",
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bda49ac12533271a8bbe1efb216edbf2fe4d89614936f48267431eb3a0ba3b2d
3
- size 343436228
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa6f5b1f9c44c3c29d9c4c7c230929790148180797e2a7c74658d6a0569b7889
3
+ size 343433152