Asadali12 commited on
Commit
c905cc3
·
verified ·
1 Parent(s): dcfdfd6

End of training

Browse files
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.2439999999999998,
3
+ "eval_accuracy": 0.7586206896551724,
4
+ "eval_loss": 0.8059644103050232,
5
+ "eval_runtime": 62.6184,
6
+ "eval_samples_per_second": 0.926,
7
+ "eval_steps_per_second": 0.064
8
+ }
runs/Sep28_19-47-20_602ba6f27d79/events.out.tfevents.1727558261.602ba6f27d79.31.1 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5e89e7fedd9e9aaf85b7378def7f9fe79575d18729702b358e5848bd7891bf1f
3
- size 411
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfcb22a57eb377196ca671d27371b35d547c57b65e7e1bad37d24bd0487079a7
3
+ size 734
test_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.2439999999999998,
3
+ "eval_accuracy": 0.7586206896551724,
4
+ "eval_loss": 0.8059644103050232,
5
+ "eval_runtime": 62.6184,
6
+ "eval_samples_per_second": 0.926,
7
+ "eval_steps_per_second": 0.064
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,271 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.6818181818181818,
3
+ "best_model_checkpoint": "videomae-base-finetuned-Custom_Dataset_Finetune/checkpoint-126",
4
+ "epoch": 3.2439999999999998,
5
+ "eval_steps": 500,
6
+ "global_step": 250,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.04,
13
+ "grad_norm": 4.941892147064209,
14
+ "learning_rate": 2.4e-05,
15
+ "loss": 1.8353,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.08,
20
+ "grad_norm": 6.511570453643799,
21
+ "learning_rate": 4.8e-05,
22
+ "loss": 1.7037,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.12,
27
+ "grad_norm": 5.7441301345825195,
28
+ "learning_rate": 5.8666666666666665e-05,
29
+ "loss": 1.4799,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.16,
34
+ "grad_norm": 6.4645514488220215,
35
+ "learning_rate": 5.6e-05,
36
+ "loss": 1.2797,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.2,
41
+ "grad_norm": 6.132994651794434,
42
+ "learning_rate": 5.333333333333333e-05,
43
+ "loss": 1.1603,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.24,
48
+ "grad_norm": 4.506332874298096,
49
+ "learning_rate": 5.066666666666667e-05,
50
+ "loss": 0.9284,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.252,
55
+ "eval_accuracy": 0.5151515151515151,
56
+ "eval_loss": 0.9844135046005249,
57
+ "eval_runtime": 60.7277,
58
+ "eval_samples_per_second": 1.087,
59
+ "eval_steps_per_second": 0.082,
60
+ "step": 63
61
+ },
62
+ {
63
+ "epoch": 1.028,
64
+ "grad_norm": 6.4644880294799805,
65
+ "learning_rate": 4.8e-05,
66
+ "loss": 0.918,
67
+ "step": 70
68
+ },
69
+ {
70
+ "epoch": 1.068,
71
+ "grad_norm": 10.729351997375488,
72
+ "learning_rate": 4.5333333333333335e-05,
73
+ "loss": 0.9294,
74
+ "step": 80
75
+ },
76
+ {
77
+ "epoch": 1.108,
78
+ "grad_norm": 8.79098129272461,
79
+ "learning_rate": 4.266666666666667e-05,
80
+ "loss": 0.9386,
81
+ "step": 90
82
+ },
83
+ {
84
+ "epoch": 1.148,
85
+ "grad_norm": 4.564964294433594,
86
+ "learning_rate": 3.9999999999999996e-05,
87
+ "loss": 1.0353,
88
+ "step": 100
89
+ },
90
+ {
91
+ "epoch": 1.188,
92
+ "grad_norm": 6.161099433898926,
93
+ "learning_rate": 3.733333333333334e-05,
94
+ "loss": 0.9968,
95
+ "step": 110
96
+ },
97
+ {
98
+ "epoch": 1.228,
99
+ "grad_norm": 4.650356292724609,
100
+ "learning_rate": 3.4666666666666665e-05,
101
+ "loss": 0.9402,
102
+ "step": 120
103
+ },
104
+ {
105
+ "epoch": 1.252,
106
+ "eval_accuracy": 0.6818181818181818,
107
+ "eval_loss": 0.9521052241325378,
108
+ "eval_runtime": 62.8381,
109
+ "eval_samples_per_second": 1.05,
110
+ "eval_steps_per_second": 0.08,
111
+ "step": 126
112
+ },
113
+ {
114
+ "epoch": 2.016,
115
+ "grad_norm": 3.565009117126465,
116
+ "learning_rate": 3.2e-05,
117
+ "loss": 0.8584,
118
+ "step": 130
119
+ },
120
+ {
121
+ "epoch": 2.056,
122
+ "grad_norm": 5.3584489822387695,
123
+ "learning_rate": 2.9333333333333333e-05,
124
+ "loss": 0.8446,
125
+ "step": 140
126
+ },
127
+ {
128
+ "epoch": 2.096,
129
+ "grad_norm": 9.390802383422852,
130
+ "learning_rate": 2.6666666666666667e-05,
131
+ "loss": 0.8279,
132
+ "step": 150
133
+ },
134
+ {
135
+ "epoch": 2.136,
136
+ "grad_norm": 7.512342929840088,
137
+ "learning_rate": 2.4e-05,
138
+ "loss": 0.8049,
139
+ "step": 160
140
+ },
141
+ {
142
+ "epoch": 2.176,
143
+ "grad_norm": 4.40623664855957,
144
+ "learning_rate": 2.1333333333333335e-05,
145
+ "loss": 0.8291,
146
+ "step": 170
147
+ },
148
+ {
149
+ "epoch": 2.216,
150
+ "grad_norm": 6.4893059730529785,
151
+ "learning_rate": 1.866666666666667e-05,
152
+ "loss": 0.8639,
153
+ "step": 180
154
+ },
155
+ {
156
+ "epoch": 2.252,
157
+ "eval_accuracy": 0.6060606060606061,
158
+ "eval_loss": 0.7931472063064575,
159
+ "eval_runtime": 63.7339,
160
+ "eval_samples_per_second": 1.036,
161
+ "eval_steps_per_second": 0.078,
162
+ "step": 189
163
+ },
164
+ {
165
+ "epoch": 3.004,
166
+ "grad_norm": 5.078976154327393,
167
+ "learning_rate": 1.6e-05,
168
+ "loss": 0.7345,
169
+ "step": 190
170
+ },
171
+ {
172
+ "epoch": 3.044,
173
+ "grad_norm": 2.724031925201416,
174
+ "learning_rate": 1.3333333333333333e-05,
175
+ "loss": 0.7462,
176
+ "step": 200
177
+ },
178
+ {
179
+ "epoch": 3.084,
180
+ "grad_norm": 3.8141984939575195,
181
+ "learning_rate": 1.0666666666666667e-05,
182
+ "loss": 0.7004,
183
+ "step": 210
184
+ },
185
+ {
186
+ "epoch": 3.124,
187
+ "grad_norm": 3.8294148445129395,
188
+ "learning_rate": 8e-06,
189
+ "loss": 0.7095,
190
+ "step": 220
191
+ },
192
+ {
193
+ "epoch": 3.164,
194
+ "grad_norm": 5.064866065979004,
195
+ "learning_rate": 5.333333333333334e-06,
196
+ "loss": 0.7274,
197
+ "step": 230
198
+ },
199
+ {
200
+ "epoch": 3.204,
201
+ "grad_norm": 3.320218801498413,
202
+ "learning_rate": 2.666666666666667e-06,
203
+ "loss": 0.6674,
204
+ "step": 240
205
+ },
206
+ {
207
+ "epoch": 3.2439999999999998,
208
+ "grad_norm": 3.968177080154419,
209
+ "learning_rate": 0.0,
210
+ "loss": 0.6195,
211
+ "step": 250
212
+ },
213
+ {
214
+ "epoch": 3.2439999999999998,
215
+ "eval_accuracy": 0.6363636363636364,
216
+ "eval_loss": 0.910934329032898,
217
+ "eval_runtime": 73.6738,
218
+ "eval_samples_per_second": 0.896,
219
+ "eval_steps_per_second": 0.068,
220
+ "step": 250
221
+ },
222
+ {
223
+ "epoch": 3.2439999999999998,
224
+ "step": 250,
225
+ "total_flos": 4.9769595049134e+18,
226
+ "train_loss": 0.9631701583862304,
227
+ "train_runtime": 5330.9549,
228
+ "train_samples_per_second": 0.75,
229
+ "train_steps_per_second": 0.047
230
+ },
231
+ {
232
+ "epoch": 3.2439999999999998,
233
+ "eval_accuracy": 0.7586206896551724,
234
+ "eval_loss": 0.8059645295143127,
235
+ "eval_runtime": 63.0154,
236
+ "eval_samples_per_second": 0.92,
237
+ "eval_steps_per_second": 0.063,
238
+ "step": 250
239
+ },
240
+ {
241
+ "epoch": 3.2439999999999998,
242
+ "eval_accuracy": 0.7586206896551724,
243
+ "eval_loss": 0.8059644103050232,
244
+ "eval_runtime": 62.6184,
245
+ "eval_samples_per_second": 0.926,
246
+ "eval_steps_per_second": 0.064,
247
+ "step": 250
248
+ }
249
+ ],
250
+ "logging_steps": 10,
251
+ "max_steps": 250,
252
+ "num_input_tokens_seen": 0,
253
+ "num_train_epochs": 9223372036854775807,
254
+ "save_steps": 500,
255
+ "stateful_callbacks": {
256
+ "TrainerControl": {
257
+ "args": {
258
+ "should_epoch_stop": false,
259
+ "should_evaluate": false,
260
+ "should_log": false,
261
+ "should_save": true,
262
+ "should_training_stop": true
263
+ },
264
+ "attributes": {}
265
+ }
266
+ },
267
+ "total_flos": 4.9769595049134e+18,
268
+ "train_batch_size": 16,
269
+ "trial_name": null,
270
+ "trial_params": null
271
+ }