Coyoteranger commited on
Commit
4952d9a
·
verified ·
1 Parent(s): cfe5488

Upload 28 files

Browse files
Files changed (28) hide show
  1. flutter_codegen_model/checkpoint-1000/added_tokens.json +40 -0
  2. flutter_codegen_model/checkpoint-1000/config.json +42 -0
  3. flutter_codegen_model/checkpoint-1000/generation_config.json +6 -0
  4. flutter_codegen_model/checkpoint-1000/merges.txt +0 -0
  5. flutter_codegen_model/checkpoint-1000/model.safetensors +3 -0
  6. flutter_codegen_model/checkpoint-1000/optimizer.pt +3 -0
  7. flutter_codegen_model/checkpoint-1000/rng_state.pth +3 -0
  8. flutter_codegen_model/checkpoint-1000/scheduler.pt +3 -0
  9. flutter_codegen_model/checkpoint-1000/special_tokens_map.json +24 -0
  10. flutter_codegen_model/checkpoint-1000/tokenizer.json +0 -0
  11. flutter_codegen_model/checkpoint-1000/tokenizer_config.json +332 -0
  12. flutter_codegen_model/checkpoint-1000/trainer_state.json +733 -0
  13. flutter_codegen_model/checkpoint-1000/training_args.bin +3 -0
  14. flutter_codegen_model/checkpoint-1000/vocab.json +0 -0
  15. flutter_codegen_model/checkpoint-1500/added_tokens.json +40 -0
  16. flutter_codegen_model/checkpoint-1500/config.json +42 -0
  17. flutter_codegen_model/checkpoint-1500/generation_config.json +6 -0
  18. flutter_codegen_model/checkpoint-1500/merges.txt +0 -0
  19. flutter_codegen_model/checkpoint-1500/model.safetensors +3 -0
  20. flutter_codegen_model/checkpoint-1500/optimizer.pt +3 -0
  21. flutter_codegen_model/checkpoint-1500/rng_state.pth +3 -0
  22. flutter_codegen_model/checkpoint-1500/scheduler.pt +3 -0
  23. flutter_codegen_model/checkpoint-1500/special_tokens_map.json +24 -0
  24. flutter_codegen_model/checkpoint-1500/tokenizer.json +0 -0
  25. flutter_codegen_model/checkpoint-1500/tokenizer_config.json +332 -0
  26. flutter_codegen_model/checkpoint-1500/trainer_state.json +1083 -0
  27. flutter_codegen_model/checkpoint-1500/training_args.bin +3 -0
  28. flutter_codegen_model/checkpoint-1500/vocab.json +0 -0
flutter_codegen_model/checkpoint-1000/added_tokens.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "\t\t": 50294,
3
+ "\t\t\t": 50293,
4
+ "\t\t\t\t": 50292,
5
+ "\t\t\t\t\t": 50291,
6
+ "\t\t\t\t\t\t": 50290,
7
+ "\t\t\t\t\t\t\t": 50289,
8
+ "\t\t\t\t\t\t\t\t": 50288,
9
+ "\t\t\t\t\t\t\t\t\t": 50287,
10
+ " ": 50286,
11
+ " ": 50285,
12
+ " ": 50284,
13
+ " ": 50283,
14
+ " ": 50282,
15
+ " ": 50281,
16
+ " ": 50280,
17
+ " ": 50279,
18
+ " ": 50278,
19
+ " ": 50277,
20
+ " ": 50276,
21
+ " ": 50275,
22
+ " ": 50274,
23
+ " ": 50273,
24
+ " ": 50272,
25
+ " ": 50271,
26
+ " ": 50270,
27
+ " ": 50269,
28
+ " ": 50268,
29
+ " ": 50267,
30
+ " ": 50266,
31
+ " ": 50265,
32
+ " ": 50264,
33
+ " ": 50263,
34
+ " ": 50262,
35
+ " ": 50261,
36
+ " ": 50260,
37
+ " ": 50259,
38
+ " ": 50258,
39
+ " ": 50257
40
+ }
flutter_codegen_model/checkpoint-1000/config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "./flutter_codegen_model/checkpoint-500",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "CodeGenForCausalLM"
6
+ ],
7
+ "attn_pdrop": 0.0,
8
+ "bos_token_id": 1,
9
+ "embd_pdrop": 0.0,
10
+ "eos_token_id": 50256,
11
+ "gradient_checkpointing": false,
12
+ "initializer_range": 0.02,
13
+ "layer_norm_epsilon": 1e-05,
14
+ "model_type": "codegen",
15
+ "n_ctx": 2048,
16
+ "n_embd": 1024,
17
+ "n_head": 16,
18
+ "n_inner": null,
19
+ "n_layer": 20,
20
+ "n_positions": 2048,
21
+ "resid_pdrop": 0.0,
22
+ "rotary_dim": 32,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50,
33
+ "temperature": 1.0
34
+ }
35
+ },
36
+ "tie_word_embeddings": false,
37
+ "tokenizer_class": "GPT2Tokenizer",
38
+ "torch_dtype": "float32",
39
+ "transformers_version": "4.46.3",
40
+ "use_cache": true,
41
+ "vocab_size": 51200
42
+ }
flutter_codegen_model/checkpoint-1000/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.46.3"
6
+ }
flutter_codegen_model/checkpoint-1000/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
flutter_codegen_model/checkpoint-1000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d9efdb939975a0509d94514d5c96aa9cd240ba89a1291dfb841be51cfa3820e
3
+ size 1426867192
flutter_codegen_model/checkpoint-1000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a808c42810a9dd898f9671ea318574e8916c1b02bb273fad82a0217ca81c4223
3
+ size 2853838010
flutter_codegen_model/checkpoint-1000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdf4d700cdd7ef81bf005d2842e2f669e1810471cfce670ab79872c6ee698af7
3
+ size 14244
flutter_codegen_model/checkpoint-1000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67e662de0fdf218dd168de2e72a056a2eef909476d429619e5d83f10376e401f
3
+ size 1064
flutter_codegen_model/checkpoint-1000/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|endoftext|>",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
flutter_codegen_model/checkpoint-1000/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
flutter_codegen_model/checkpoint-1000/tokenizer_config.json ADDED
@@ -0,0 +1,332 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "50257": {
13
+ "content": " ",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": false
19
+ },
20
+ "50258": {
21
+ "content": " ",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": false
27
+ },
28
+ "50259": {
29
+ "content": " ",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": false
35
+ },
36
+ "50260": {
37
+ "content": " ",
38
+ "lstrip": false,
39
+ "normalized": true,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": false
43
+ },
44
+ "50261": {
45
+ "content": " ",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": false
51
+ },
52
+ "50262": {
53
+ "content": " ",
54
+ "lstrip": false,
55
+ "normalized": true,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": false
59
+ },
60
+ "50263": {
61
+ "content": " ",
62
+ "lstrip": false,
63
+ "normalized": true,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": false
67
+ },
68
+ "50264": {
69
+ "content": " ",
70
+ "lstrip": false,
71
+ "normalized": true,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": false
75
+ },
76
+ "50265": {
77
+ "content": " ",
78
+ "lstrip": false,
79
+ "normalized": true,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": false
83
+ },
84
+ "50266": {
85
+ "content": " ",
86
+ "lstrip": false,
87
+ "normalized": true,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": false
91
+ },
92
+ "50267": {
93
+ "content": " ",
94
+ "lstrip": false,
95
+ "normalized": true,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": false
99
+ },
100
+ "50268": {
101
+ "content": " ",
102
+ "lstrip": false,
103
+ "normalized": true,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": false
107
+ },
108
+ "50269": {
109
+ "content": " ",
110
+ "lstrip": false,
111
+ "normalized": true,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": false
115
+ },
116
+ "50270": {
117
+ "content": " ",
118
+ "lstrip": false,
119
+ "normalized": true,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": false
123
+ },
124
+ "50271": {
125
+ "content": " ",
126
+ "lstrip": false,
127
+ "normalized": true,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": false
131
+ },
132
+ "50272": {
133
+ "content": " ",
134
+ "lstrip": false,
135
+ "normalized": true,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": false
139
+ },
140
+ "50273": {
141
+ "content": " ",
142
+ "lstrip": false,
143
+ "normalized": true,
144
+ "rstrip": false,
145
+ "single_word": false,
146
+ "special": false
147
+ },
148
+ "50274": {
149
+ "content": " ",
150
+ "lstrip": false,
151
+ "normalized": true,
152
+ "rstrip": false,
153
+ "single_word": false,
154
+ "special": false
155
+ },
156
+ "50275": {
157
+ "content": " ",
158
+ "lstrip": false,
159
+ "normalized": true,
160
+ "rstrip": false,
161
+ "single_word": false,
162
+ "special": false
163
+ },
164
+ "50276": {
165
+ "content": " ",
166
+ "lstrip": false,
167
+ "normalized": true,
168
+ "rstrip": false,
169
+ "single_word": false,
170
+ "special": false
171
+ },
172
+ "50277": {
173
+ "content": " ",
174
+ "lstrip": false,
175
+ "normalized": true,
176
+ "rstrip": false,
177
+ "single_word": false,
178
+ "special": false
179
+ },
180
+ "50278": {
181
+ "content": " ",
182
+ "lstrip": false,
183
+ "normalized": true,
184
+ "rstrip": false,
185
+ "single_word": false,
186
+ "special": false
187
+ },
188
+ "50279": {
189
+ "content": " ",
190
+ "lstrip": false,
191
+ "normalized": true,
192
+ "rstrip": false,
193
+ "single_word": false,
194
+ "special": false
195
+ },
196
+ "50280": {
197
+ "content": " ",
198
+ "lstrip": false,
199
+ "normalized": true,
200
+ "rstrip": false,
201
+ "single_word": false,
202
+ "special": false
203
+ },
204
+ "50281": {
205
+ "content": " ",
206
+ "lstrip": false,
207
+ "normalized": true,
208
+ "rstrip": false,
209
+ "single_word": false,
210
+ "special": false
211
+ },
212
+ "50282": {
213
+ "content": " ",
214
+ "lstrip": false,
215
+ "normalized": true,
216
+ "rstrip": false,
217
+ "single_word": false,
218
+ "special": false
219
+ },
220
+ "50283": {
221
+ "content": " ",
222
+ "lstrip": false,
223
+ "normalized": true,
224
+ "rstrip": false,
225
+ "single_word": false,
226
+ "special": false
227
+ },
228
+ "50284": {
229
+ "content": " ",
230
+ "lstrip": false,
231
+ "normalized": true,
232
+ "rstrip": false,
233
+ "single_word": false,
234
+ "special": false
235
+ },
236
+ "50285": {
237
+ "content": " ",
238
+ "lstrip": false,
239
+ "normalized": true,
240
+ "rstrip": false,
241
+ "single_word": false,
242
+ "special": false
243
+ },
244
+ "50286": {
245
+ "content": " ",
246
+ "lstrip": false,
247
+ "normalized": true,
248
+ "rstrip": false,
249
+ "single_word": false,
250
+ "special": false
251
+ },
252
+ "50287": {
253
+ "content": "\t\t\t\t\t\t\t\t\t",
254
+ "lstrip": false,
255
+ "normalized": true,
256
+ "rstrip": false,
257
+ "single_word": false,
258
+ "special": false
259
+ },
260
+ "50288": {
261
+ "content": "\t\t\t\t\t\t\t\t",
262
+ "lstrip": false,
263
+ "normalized": true,
264
+ "rstrip": false,
265
+ "single_word": false,
266
+ "special": false
267
+ },
268
+ "50289": {
269
+ "content": "\t\t\t\t\t\t\t",
270
+ "lstrip": false,
271
+ "normalized": true,
272
+ "rstrip": false,
273
+ "single_word": false,
274
+ "special": false
275
+ },
276
+ "50290": {
277
+ "content": "\t\t\t\t\t\t",
278
+ "lstrip": false,
279
+ "normalized": true,
280
+ "rstrip": false,
281
+ "single_word": false,
282
+ "special": false
283
+ },
284
+ "50291": {
285
+ "content": "\t\t\t\t\t",
286
+ "lstrip": false,
287
+ "normalized": true,
288
+ "rstrip": false,
289
+ "single_word": false,
290
+ "special": false
291
+ },
292
+ "50292": {
293
+ "content": "\t\t\t\t",
294
+ "lstrip": false,
295
+ "normalized": true,
296
+ "rstrip": false,
297
+ "single_word": false,
298
+ "special": false
299
+ },
300
+ "50293": {
301
+ "content": "\t\t\t",
302
+ "lstrip": false,
303
+ "normalized": true,
304
+ "rstrip": false,
305
+ "single_word": false,
306
+ "special": false
307
+ },
308
+ "50294": {
309
+ "content": "\t\t",
310
+ "lstrip": false,
311
+ "normalized": true,
312
+ "rstrip": false,
313
+ "single_word": false,
314
+ "special": false
315
+ }
316
+ },
317
+ "bos_token": "<|endoftext|>",
318
+ "clean_up_tokenization_spaces": false,
319
+ "eos_token": "<|endoftext|>",
320
+ "max_length": 512,
321
+ "model_max_length": 2048,
322
+ "pad_to_multiple_of": null,
323
+ "pad_token": "<|endoftext|>",
324
+ "pad_token_type_id": 0,
325
+ "padding_side": "right",
326
+ "return_token_type_ids": false,
327
+ "stride": 0,
328
+ "tokenizer_class": "CodeGenTokenizer",
329
+ "truncation_side": "right",
330
+ "truncation_strategy": "longest_first",
331
+ "unk_token": "<|endoftext|>"
332
+ }
flutter_codegen_model/checkpoint-1000/trainer_state.json ADDED
@@ -0,0 +1,733 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.02620407735443635,
5
+ "eval_steps": 500,
6
+ "global_step": 1000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0002620407735443635,
13
+ "grad_norm": 1.2820378541946411,
14
+ "learning_rate": 4.999563265377426e-05,
15
+ "loss": 0.6609,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.000524081547088727,
20
+ "grad_norm": 1.3106062412261963,
21
+ "learning_rate": 4.999126530754852e-05,
22
+ "loss": 0.8066,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.0007861223206330905,
27
+ "grad_norm": 1.6985974311828613,
28
+ "learning_rate": 4.9986897961322784e-05,
29
+ "loss": 0.6863,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.001048163094177454,
34
+ "grad_norm": 1.2552741765975952,
35
+ "learning_rate": 4.9982530615097044e-05,
36
+ "loss": 0.5674,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.0013102038677218176,
41
+ "grad_norm": 1.3928155899047852,
42
+ "learning_rate": 4.9978163268871304e-05,
43
+ "loss": 0.5992,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.001572244641266181,
48
+ "grad_norm": 1.5794411897659302,
49
+ "learning_rate": 4.9973795922645565e-05,
50
+ "loss": 0.7939,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.0018342854148105446,
55
+ "grad_norm": 1.4024512767791748,
56
+ "learning_rate": 4.9969428576419825e-05,
57
+ "loss": 0.6178,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.002096326188354908,
62
+ "grad_norm": 1.2232664823532104,
63
+ "learning_rate": 4.9965061230194085e-05,
64
+ "loss": 0.7544,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.0023583669618992714,
69
+ "grad_norm": 1.5368870496749878,
70
+ "learning_rate": 4.9960693883968346e-05,
71
+ "loss": 0.6645,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.002620407735443635,
76
+ "grad_norm": 1.6177372932434082,
77
+ "learning_rate": 4.9956326537742606e-05,
78
+ "loss": 0.6329,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.0028824485089879986,
83
+ "grad_norm": 1.3803173303604126,
84
+ "learning_rate": 4.995195919151687e-05,
85
+ "loss": 0.7114,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.003144489282532362,
90
+ "grad_norm": 1.5191670656204224,
91
+ "learning_rate": 4.9947591845291134e-05,
92
+ "loss": 0.8717,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.0034065300560767254,
97
+ "grad_norm": 1.2967548370361328,
98
+ "learning_rate": 4.994322449906539e-05,
99
+ "loss": 0.7618,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.003668570829621089,
104
+ "grad_norm": 1.8742738962173462,
105
+ "learning_rate": 4.9938857152839655e-05,
106
+ "loss": 0.5896,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.003930611603165452,
111
+ "grad_norm": 1.697966456413269,
112
+ "learning_rate": 4.993448980661391e-05,
113
+ "loss": 0.7493,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.004192652376709816,
118
+ "grad_norm": 1.5282775163650513,
119
+ "learning_rate": 4.9930122460388175e-05,
120
+ "loss": 0.6359,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.00445469315025418,
125
+ "grad_norm": 1.2776225805282593,
126
+ "learning_rate": 4.992575511416243e-05,
127
+ "loss": 0.7677,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 0.004716733923798543,
132
+ "grad_norm": 1.2036848068237305,
133
+ "learning_rate": 4.9921387767936696e-05,
134
+ "loss": 0.6234,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 0.0049787746973429066,
139
+ "grad_norm": 1.1325912475585938,
140
+ "learning_rate": 4.991702042171095e-05,
141
+ "loss": 0.624,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 0.00524081547088727,
146
+ "grad_norm": 1.3846409320831299,
147
+ "learning_rate": 4.991265307548522e-05,
148
+ "loss": 0.5979,
149
+ "step": 200
150
+ },
151
+ {
152
+ "epoch": 0.005502856244431633,
153
+ "grad_norm": 1.9792439937591553,
154
+ "learning_rate": 4.990828572925948e-05,
155
+ "loss": 0.7897,
156
+ "step": 210
157
+ },
158
+ {
159
+ "epoch": 0.005764897017975997,
160
+ "grad_norm": 1.8546253442764282,
161
+ "learning_rate": 4.990391838303374e-05,
162
+ "loss": 0.7035,
163
+ "step": 220
164
+ },
165
+ {
166
+ "epoch": 0.006026937791520361,
167
+ "grad_norm": 1.5434975624084473,
168
+ "learning_rate": 4.9899551036808e-05,
169
+ "loss": 0.7092,
170
+ "step": 230
171
+ },
172
+ {
173
+ "epoch": 0.006288978565064724,
174
+ "grad_norm": 1.0328209400177002,
175
+ "learning_rate": 4.989518369058226e-05,
176
+ "loss": 0.6239,
177
+ "step": 240
178
+ },
179
+ {
180
+ "epoch": 0.006551019338609088,
181
+ "grad_norm": 0.9389006495475769,
182
+ "learning_rate": 4.989081634435652e-05,
183
+ "loss": 0.6957,
184
+ "step": 250
185
+ },
186
+ {
187
+ "epoch": 0.006813060112153451,
188
+ "grad_norm": 1.0274962186813354,
189
+ "learning_rate": 4.988644899813078e-05,
190
+ "loss": 0.5302,
191
+ "step": 260
192
+ },
193
+ {
194
+ "epoch": 0.0070751008856978145,
195
+ "grad_norm": 1.2626285552978516,
196
+ "learning_rate": 4.988208165190504e-05,
197
+ "loss": 0.6541,
198
+ "step": 270
199
+ },
200
+ {
201
+ "epoch": 0.007337141659242178,
202
+ "grad_norm": 1.4558316469192505,
203
+ "learning_rate": 4.98777143056793e-05,
204
+ "loss": 0.7284,
205
+ "step": 280
206
+ },
207
+ {
208
+ "epoch": 0.007599182432786541,
209
+ "grad_norm": 1.3997328281402588,
210
+ "learning_rate": 4.987334695945356e-05,
211
+ "loss": 0.6888,
212
+ "step": 290
213
+ },
214
+ {
215
+ "epoch": 0.007861223206330904,
216
+ "grad_norm": 1.7742432355880737,
217
+ "learning_rate": 4.986897961322782e-05,
218
+ "loss": 0.6869,
219
+ "step": 300
220
+ },
221
+ {
222
+ "epoch": 0.008123263979875268,
223
+ "grad_norm": 1.4925923347473145,
224
+ "learning_rate": 4.986461226700208e-05,
225
+ "loss": 0.731,
226
+ "step": 310
227
+ },
228
+ {
229
+ "epoch": 0.008385304753419632,
230
+ "grad_norm": 1.5941082239151,
231
+ "learning_rate": 4.986024492077634e-05,
232
+ "loss": 0.7149,
233
+ "step": 320
234
+ },
235
+ {
236
+ "epoch": 0.008647345526963996,
237
+ "grad_norm": 1.507450819015503,
238
+ "learning_rate": 4.98558775745506e-05,
239
+ "loss": 0.6443,
240
+ "step": 330
241
+ },
242
+ {
243
+ "epoch": 0.00890938630050836,
244
+ "grad_norm": 0.9866214990615845,
245
+ "learning_rate": 4.985151022832487e-05,
246
+ "loss": 0.6003,
247
+ "step": 340
248
+ },
249
+ {
250
+ "epoch": 0.009171427074052723,
251
+ "grad_norm": 0.9682250022888184,
252
+ "learning_rate": 4.984714288209912e-05,
253
+ "loss": 0.6602,
254
+ "step": 350
255
+ },
256
+ {
257
+ "epoch": 0.009433467847597085,
258
+ "grad_norm": 1.7567181587219238,
259
+ "learning_rate": 4.984277553587338e-05,
260
+ "loss": 0.5647,
261
+ "step": 360
262
+ },
263
+ {
264
+ "epoch": 0.00969550862114145,
265
+ "grad_norm": 1.4297257661819458,
266
+ "learning_rate": 4.9838408189647643e-05,
267
+ "loss": 0.6628,
268
+ "step": 370
269
+ },
270
+ {
271
+ "epoch": 0.009957549394685813,
272
+ "grad_norm": 1.515763521194458,
273
+ "learning_rate": 4.9834040843421904e-05,
274
+ "loss": 0.568,
275
+ "step": 380
276
+ },
277
+ {
278
+ "epoch": 0.010219590168230177,
279
+ "grad_norm": 1.1806342601776123,
280
+ "learning_rate": 4.9829673497196164e-05,
281
+ "loss": 0.6729,
282
+ "step": 390
283
+ },
284
+ {
285
+ "epoch": 0.01048163094177454,
286
+ "grad_norm": 1.1328020095825195,
287
+ "learning_rate": 4.9825306150970425e-05,
288
+ "loss": 0.6963,
289
+ "step": 400
290
+ },
291
+ {
292
+ "epoch": 0.010743671715318903,
293
+ "grad_norm": 0.7740004658699036,
294
+ "learning_rate": 4.9820938804744685e-05,
295
+ "loss": 0.6654,
296
+ "step": 410
297
+ },
298
+ {
299
+ "epoch": 0.011005712488863267,
300
+ "grad_norm": 0.9519413113594055,
301
+ "learning_rate": 4.9816571458518945e-05,
302
+ "loss": 0.6487,
303
+ "step": 420
304
+ },
305
+ {
306
+ "epoch": 0.01126775326240763,
307
+ "grad_norm": 0.8964847922325134,
308
+ "learning_rate": 4.981220411229321e-05,
309
+ "loss": 0.5667,
310
+ "step": 430
311
+ },
312
+ {
313
+ "epoch": 0.011529794035951994,
314
+ "grad_norm": 1.428072452545166,
315
+ "learning_rate": 4.9807836766067466e-05,
316
+ "loss": 0.8164,
317
+ "step": 440
318
+ },
319
+ {
320
+ "epoch": 0.011791834809496358,
321
+ "grad_norm": 1.4375147819519043,
322
+ "learning_rate": 4.980346941984173e-05,
323
+ "loss": 0.5476,
324
+ "step": 450
325
+ },
326
+ {
327
+ "epoch": 0.012053875583040722,
328
+ "grad_norm": 1.1702146530151367,
329
+ "learning_rate": 4.979910207361599e-05,
330
+ "loss": 0.7342,
331
+ "step": 460
332
+ },
333
+ {
334
+ "epoch": 0.012315916356585084,
335
+ "grad_norm": 1.2703320980072021,
336
+ "learning_rate": 4.9794734727390254e-05,
337
+ "loss": 0.5767,
338
+ "step": 470
339
+ },
340
+ {
341
+ "epoch": 0.012577957130129448,
342
+ "grad_norm": 1.2520267963409424,
343
+ "learning_rate": 4.979036738116451e-05,
344
+ "loss": 0.5969,
345
+ "step": 480
346
+ },
347
+ {
348
+ "epoch": 0.012839997903673812,
349
+ "grad_norm": 1.413979172706604,
350
+ "learning_rate": 4.9786000034938775e-05,
351
+ "loss": 0.7011,
352
+ "step": 490
353
+ },
354
+ {
355
+ "epoch": 0.013102038677218176,
356
+ "grad_norm": 1.3918565511703491,
357
+ "learning_rate": 4.978163268871303e-05,
358
+ "loss": 0.6041,
359
+ "step": 500
360
+ },
361
+ {
362
+ "epoch": 0.01336407945076254,
363
+ "grad_norm": 0.9175894260406494,
364
+ "learning_rate": 4.9777265342487296e-05,
365
+ "loss": 0.5052,
366
+ "step": 510
367
+ },
368
+ {
369
+ "epoch": 0.013626120224306901,
370
+ "grad_norm": 1.296505331993103,
371
+ "learning_rate": 4.9772897996261556e-05,
372
+ "loss": 0.6076,
373
+ "step": 520
374
+ },
375
+ {
376
+ "epoch": 0.013888160997851265,
377
+ "grad_norm": 1.2490183115005493,
378
+ "learning_rate": 4.9768530650035816e-05,
379
+ "loss": 0.6287,
380
+ "step": 530
381
+ },
382
+ {
383
+ "epoch": 0.014150201771395629,
384
+ "grad_norm": 1.398285984992981,
385
+ "learning_rate": 4.976416330381008e-05,
386
+ "loss": 0.7905,
387
+ "step": 540
388
+ },
389
+ {
390
+ "epoch": 0.014412242544939993,
391
+ "grad_norm": 1.3094829320907593,
392
+ "learning_rate": 4.975979595758434e-05,
393
+ "loss": 0.5133,
394
+ "step": 550
395
+ },
396
+ {
397
+ "epoch": 0.014674283318484357,
398
+ "grad_norm": 1.7128199338912964,
399
+ "learning_rate": 4.97554286113586e-05,
400
+ "loss": 0.824,
401
+ "step": 560
402
+ },
403
+ {
404
+ "epoch": 0.014936324092028719,
405
+ "grad_norm": 1.1319103240966797,
406
+ "learning_rate": 4.975106126513286e-05,
407
+ "loss": 0.5893,
408
+ "step": 570
409
+ },
410
+ {
411
+ "epoch": 0.015198364865573083,
412
+ "grad_norm": 1.6176029443740845,
413
+ "learning_rate": 4.974669391890712e-05,
414
+ "loss": 0.581,
415
+ "step": 580
416
+ },
417
+ {
418
+ "epoch": 0.015460405639117446,
419
+ "grad_norm": 1.590836524963379,
420
+ "learning_rate": 4.974232657268138e-05,
421
+ "loss": 0.5814,
422
+ "step": 590
423
+ },
424
+ {
425
+ "epoch": 0.01572244641266181,
426
+ "grad_norm": 1.6922227144241333,
427
+ "learning_rate": 4.973795922645564e-05,
428
+ "loss": 0.5917,
429
+ "step": 600
430
+ },
431
+ {
432
+ "epoch": 0.015984487186206174,
433
+ "grad_norm": 1.3479337692260742,
434
+ "learning_rate": 4.97335918802299e-05,
435
+ "loss": 0.7356,
436
+ "step": 610
437
+ },
438
+ {
439
+ "epoch": 0.016246527959750536,
440
+ "grad_norm": 2.2019124031066895,
441
+ "learning_rate": 4.972922453400416e-05,
442
+ "loss": 0.6197,
443
+ "step": 620
444
+ },
445
+ {
446
+ "epoch": 0.016508568733294902,
447
+ "grad_norm": 1.6211423873901367,
448
+ "learning_rate": 4.972485718777842e-05,
449
+ "loss": 0.6299,
450
+ "step": 630
451
+ },
452
+ {
453
+ "epoch": 0.016770609506839264,
454
+ "grad_norm": 1.157416582107544,
455
+ "learning_rate": 4.972048984155268e-05,
456
+ "loss": 0.6343,
457
+ "step": 640
458
+ },
459
+ {
460
+ "epoch": 0.01703265028038363,
461
+ "grad_norm": 1.2576712369918823,
462
+ "learning_rate": 4.971612249532695e-05,
463
+ "loss": 0.6126,
464
+ "step": 650
465
+ },
466
+ {
467
+ "epoch": 0.01729469105392799,
468
+ "grad_norm": 1.3852715492248535,
469
+ "learning_rate": 4.97117551491012e-05,
470
+ "loss": 0.6352,
471
+ "step": 660
472
+ },
473
+ {
474
+ "epoch": 0.017556731827472354,
475
+ "grad_norm": 1.0178048610687256,
476
+ "learning_rate": 4.970738780287547e-05,
477
+ "loss": 0.5923,
478
+ "step": 670
479
+ },
480
+ {
481
+ "epoch": 0.01781877260101672,
482
+ "grad_norm": 0.8760583996772766,
483
+ "learning_rate": 4.970302045664972e-05,
484
+ "loss": 0.6158,
485
+ "step": 680
486
+ },
487
+ {
488
+ "epoch": 0.01808081337456108,
489
+ "grad_norm": 0.8956984281539917,
490
+ "learning_rate": 4.969865311042398e-05,
491
+ "loss": 0.5746,
492
+ "step": 690
493
+ },
494
+ {
495
+ "epoch": 0.018342854148105447,
496
+ "grad_norm": 1.1126501560211182,
497
+ "learning_rate": 4.969428576419824e-05,
498
+ "loss": 0.6254,
499
+ "step": 700
500
+ },
501
+ {
502
+ "epoch": 0.01860489492164981,
503
+ "grad_norm": 1.168455958366394,
504
+ "learning_rate": 4.96899184179725e-05,
505
+ "loss": 0.652,
506
+ "step": 710
507
+ },
508
+ {
509
+ "epoch": 0.01886693569519417,
510
+ "grad_norm": 1.3628567457199097,
511
+ "learning_rate": 4.9685551071746764e-05,
512
+ "loss": 0.5789,
513
+ "step": 720
514
+ },
515
+ {
516
+ "epoch": 0.019128976468738536,
517
+ "grad_norm": 1.1971865892410278,
518
+ "learning_rate": 4.9681183725521024e-05,
519
+ "loss": 0.6408,
520
+ "step": 730
521
+ },
522
+ {
523
+ "epoch": 0.0193910172422829,
524
+ "grad_norm": 1.1916868686676025,
525
+ "learning_rate": 4.9676816379295285e-05,
526
+ "loss": 0.6461,
527
+ "step": 740
528
+ },
529
+ {
530
+ "epoch": 0.019653058015827264,
531
+ "grad_norm": 1.1797837018966675,
532
+ "learning_rate": 4.9672449033069545e-05,
533
+ "loss": 0.5843,
534
+ "step": 750
535
+ },
536
+ {
537
+ "epoch": 0.019915098789371626,
538
+ "grad_norm": 0.7941935658454895,
539
+ "learning_rate": 4.966808168684381e-05,
540
+ "loss": 0.6165,
541
+ "step": 760
542
+ },
543
+ {
544
+ "epoch": 0.02017713956291599,
545
+ "grad_norm": 1.4876329898834229,
546
+ "learning_rate": 4.9663714340618066e-05,
547
+ "loss": 0.6347,
548
+ "step": 770
549
+ },
550
+ {
551
+ "epoch": 0.020439180336460354,
552
+ "grad_norm": 1.1482038497924805,
553
+ "learning_rate": 4.965934699439233e-05,
554
+ "loss": 0.5662,
555
+ "step": 780
556
+ },
557
+ {
558
+ "epoch": 0.020701221110004716,
559
+ "grad_norm": 1.3942419290542603,
560
+ "learning_rate": 4.9654979648166586e-05,
561
+ "loss": 0.6189,
562
+ "step": 790
563
+ },
564
+ {
565
+ "epoch": 0.02096326188354908,
566
+ "grad_norm": 0.8826277256011963,
567
+ "learning_rate": 4.9650612301940854e-05,
568
+ "loss": 0.6801,
569
+ "step": 800
570
+ },
571
+ {
572
+ "epoch": 0.021225302657093444,
573
+ "grad_norm": 1.3729712963104248,
574
+ "learning_rate": 4.964624495571511e-05,
575
+ "loss": 0.5789,
576
+ "step": 810
577
+ },
578
+ {
579
+ "epoch": 0.021487343430637806,
580
+ "grad_norm": 0.747199296951294,
581
+ "learning_rate": 4.9641877609489374e-05,
582
+ "loss": 0.651,
583
+ "step": 820
584
+ },
585
+ {
586
+ "epoch": 0.02174938420418217,
587
+ "grad_norm": 0.7911145091056824,
588
+ "learning_rate": 4.963751026326363e-05,
589
+ "loss": 0.6834,
590
+ "step": 830
591
+ },
592
+ {
593
+ "epoch": 0.022011424977726533,
594
+ "grad_norm": 1.1725844144821167,
595
+ "learning_rate": 4.9633142917037895e-05,
596
+ "loss": 0.6687,
597
+ "step": 840
598
+ },
599
+ {
600
+ "epoch": 0.0222734657512709,
601
+ "grad_norm": 1.2759829759597778,
602
+ "learning_rate": 4.9628775570812156e-05,
603
+ "loss": 0.6612,
604
+ "step": 850
605
+ },
606
+ {
607
+ "epoch": 0.02253550652481526,
608
+ "grad_norm": 1.497684359550476,
609
+ "learning_rate": 4.9624408224586416e-05,
610
+ "loss": 0.686,
611
+ "step": 860
612
+ },
613
+ {
614
+ "epoch": 0.022797547298359623,
615
+ "grad_norm": 1.4431102275848389,
616
+ "learning_rate": 4.9620040878360676e-05,
617
+ "loss": 0.5838,
618
+ "step": 870
619
+ },
620
+ {
621
+ "epoch": 0.02305958807190399,
622
+ "grad_norm": 0.8864196538925171,
623
+ "learning_rate": 4.961567353213494e-05,
624
+ "loss": 0.6076,
625
+ "step": 880
626
+ },
627
+ {
628
+ "epoch": 0.02332162884544835,
629
+ "grad_norm": 1.4421597719192505,
630
+ "learning_rate": 4.96113061859092e-05,
631
+ "loss": 0.6669,
632
+ "step": 890
633
+ },
634
+ {
635
+ "epoch": 0.023583669618992716,
636
+ "grad_norm": 1.541601300239563,
637
+ "learning_rate": 4.960693883968346e-05,
638
+ "loss": 0.477,
639
+ "step": 900
640
+ },
641
+ {
642
+ "epoch": 0.02384571039253708,
643
+ "grad_norm": 1.0725853443145752,
644
+ "learning_rate": 4.960257149345772e-05,
645
+ "loss": 0.5399,
646
+ "step": 910
647
+ },
648
+ {
649
+ "epoch": 0.024107751166081444,
650
+ "grad_norm": 1.4579834938049316,
651
+ "learning_rate": 4.959820414723198e-05,
652
+ "loss": 0.636,
653
+ "step": 920
654
+ },
655
+ {
656
+ "epoch": 0.024369791939625806,
657
+ "grad_norm": 1.1018449068069458,
658
+ "learning_rate": 4.959383680100624e-05,
659
+ "loss": 0.7731,
660
+ "step": 930
661
+ },
662
+ {
663
+ "epoch": 0.024631832713170168,
664
+ "grad_norm": 1.3531861305236816,
665
+ "learning_rate": 4.95894694547805e-05,
666
+ "loss": 0.7017,
667
+ "step": 940
668
+ },
669
+ {
670
+ "epoch": 0.024893873486714534,
671
+ "grad_norm": 1.1225773096084595,
672
+ "learning_rate": 4.958510210855476e-05,
673
+ "loss": 0.7228,
674
+ "step": 950
675
+ },
676
+ {
677
+ "epoch": 0.025155914260258896,
678
+ "grad_norm": 0.6442508697509766,
679
+ "learning_rate": 4.958073476232902e-05,
680
+ "loss": 0.4421,
681
+ "step": 960
682
+ },
683
+ {
684
+ "epoch": 0.02541795503380326,
685
+ "grad_norm": 1.1338638067245483,
686
+ "learning_rate": 4.957636741610328e-05,
687
+ "loss": 0.6533,
688
+ "step": 970
689
+ },
690
+ {
691
+ "epoch": 0.025679995807347623,
692
+ "grad_norm": 1.0796573162078857,
693
+ "learning_rate": 4.957200006987755e-05,
694
+ "loss": 0.6407,
695
+ "step": 980
696
+ },
697
+ {
698
+ "epoch": 0.025942036580891985,
699
+ "grad_norm": 1.001578450202942,
700
+ "learning_rate": 4.95676327236518e-05,
701
+ "loss": 0.652,
702
+ "step": 990
703
+ },
704
+ {
705
+ "epoch": 0.02620407735443635,
706
+ "grad_norm": 1.521545648574829,
707
+ "learning_rate": 4.956326537742607e-05,
708
+ "loss": 0.4812,
709
+ "step": 1000
710
+ }
711
+ ],
712
+ "logging_steps": 10,
713
+ "max_steps": 114486,
714
+ "num_input_tokens_seen": 0,
715
+ "num_train_epochs": 3,
716
+ "save_steps": 500,
717
+ "stateful_callbacks": {
718
+ "TrainerControl": {
719
+ "args": {
720
+ "should_epoch_stop": false,
721
+ "should_evaluate": false,
722
+ "should_log": false,
723
+ "should_save": true,
724
+ "should_training_stop": false
725
+ },
726
+ "attributes": {}
727
+ }
728
+ },
729
+ "total_flos": 3739037466624000.0,
730
+ "train_batch_size": 4,
731
+ "trial_name": null,
732
+ "trial_params": null
733
+ }
flutter_codegen_model/checkpoint-1000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da53cc9a018e467dc692093da1b9a59e8d739bedb689402e677cfe0810541434
3
+ size 5240
flutter_codegen_model/checkpoint-1000/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
flutter_codegen_model/checkpoint-1500/added_tokens.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "\t\t": 50294,
3
+ "\t\t\t": 50293,
4
+ "\t\t\t\t": 50292,
5
+ "\t\t\t\t\t": 50291,
6
+ "\t\t\t\t\t\t": 50290,
7
+ "\t\t\t\t\t\t\t": 50289,
8
+ "\t\t\t\t\t\t\t\t": 50288,
9
+ "\t\t\t\t\t\t\t\t\t": 50287,
10
+ " ": 50286,
11
+ " ": 50285,
12
+ " ": 50284,
13
+ " ": 50283,
14
+ " ": 50282,
15
+ " ": 50281,
16
+ " ": 50280,
17
+ " ": 50279,
18
+ " ": 50278,
19
+ " ": 50277,
20
+ " ": 50276,
21
+ " ": 50275,
22
+ " ": 50274,
23
+ " ": 50273,
24
+ " ": 50272,
25
+ " ": 50271,
26
+ " ": 50270,
27
+ " ": 50269,
28
+ " ": 50268,
29
+ " ": 50267,
30
+ " ": 50266,
31
+ " ": 50265,
32
+ " ": 50264,
33
+ " ": 50263,
34
+ " ": 50262,
35
+ " ": 50261,
36
+ " ": 50260,
37
+ " ": 50259,
38
+ " ": 50258,
39
+ " ": 50257
40
+ }
flutter_codegen_model/checkpoint-1500/config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "./flutter_codegen_model/checkpoint-500",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "CodeGenForCausalLM"
6
+ ],
7
+ "attn_pdrop": 0.0,
8
+ "bos_token_id": 1,
9
+ "embd_pdrop": 0.0,
10
+ "eos_token_id": 50256,
11
+ "gradient_checkpointing": false,
12
+ "initializer_range": 0.02,
13
+ "layer_norm_epsilon": 1e-05,
14
+ "model_type": "codegen",
15
+ "n_ctx": 2048,
16
+ "n_embd": 1024,
17
+ "n_head": 16,
18
+ "n_inner": null,
19
+ "n_layer": 20,
20
+ "n_positions": 2048,
21
+ "resid_pdrop": 0.0,
22
+ "rotary_dim": 32,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50,
33
+ "temperature": 1.0
34
+ }
35
+ },
36
+ "tie_word_embeddings": false,
37
+ "tokenizer_class": "GPT2Tokenizer",
38
+ "torch_dtype": "float32",
39
+ "transformers_version": "4.46.3",
40
+ "use_cache": true,
41
+ "vocab_size": 51200
42
+ }
flutter_codegen_model/checkpoint-1500/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.46.3"
6
+ }
flutter_codegen_model/checkpoint-1500/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
flutter_codegen_model/checkpoint-1500/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78f591ad3269b4f50e1c9f87b6e2330de4fc1672522dae11bb362a2149ca1348
3
+ size 1426867192
flutter_codegen_model/checkpoint-1500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7281fe66fc45a9b2b2bf349a6c0e97585d4372b417b53b819c4e4acaa0959571
3
+ size 2853838010
flutter_codegen_model/checkpoint-1500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdf4d700cdd7ef81bf005d2842e2f669e1810471cfce670ab79872c6ee698af7
3
+ size 14244
flutter_codegen_model/checkpoint-1500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50e53cce2010b8d10917ee837774808926eef68f9dad453542a81370237a2b86
3
+ size 1064
flutter_codegen_model/checkpoint-1500/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|endoftext|>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|endoftext|>",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
flutter_codegen_model/checkpoint-1500/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
flutter_codegen_model/checkpoint-1500/tokenizer_config.json ADDED
@@ -0,0 +1,332 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "50256": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "50257": {
13
+ "content": " ",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": false
19
+ },
20
+ "50258": {
21
+ "content": " ",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": false
27
+ },
28
+ "50259": {
29
+ "content": " ",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": false
35
+ },
36
+ "50260": {
37
+ "content": " ",
38
+ "lstrip": false,
39
+ "normalized": true,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": false
43
+ },
44
+ "50261": {
45
+ "content": " ",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": false
51
+ },
52
+ "50262": {
53
+ "content": " ",
54
+ "lstrip": false,
55
+ "normalized": true,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": false
59
+ },
60
+ "50263": {
61
+ "content": " ",
62
+ "lstrip": false,
63
+ "normalized": true,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": false
67
+ },
68
+ "50264": {
69
+ "content": " ",
70
+ "lstrip": false,
71
+ "normalized": true,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": false
75
+ },
76
+ "50265": {
77
+ "content": " ",
78
+ "lstrip": false,
79
+ "normalized": true,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": false
83
+ },
84
+ "50266": {
85
+ "content": " ",
86
+ "lstrip": false,
87
+ "normalized": true,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": false
91
+ },
92
+ "50267": {
93
+ "content": " ",
94
+ "lstrip": false,
95
+ "normalized": true,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": false
99
+ },
100
+ "50268": {
101
+ "content": " ",
102
+ "lstrip": false,
103
+ "normalized": true,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": false
107
+ },
108
+ "50269": {
109
+ "content": " ",
110
+ "lstrip": false,
111
+ "normalized": true,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": false
115
+ },
116
+ "50270": {
117
+ "content": " ",
118
+ "lstrip": false,
119
+ "normalized": true,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": false
123
+ },
124
+ "50271": {
125
+ "content": " ",
126
+ "lstrip": false,
127
+ "normalized": true,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": false
131
+ },
132
+ "50272": {
133
+ "content": " ",
134
+ "lstrip": false,
135
+ "normalized": true,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": false
139
+ },
140
+ "50273": {
141
+ "content": " ",
142
+ "lstrip": false,
143
+ "normalized": true,
144
+ "rstrip": false,
145
+ "single_word": false,
146
+ "special": false
147
+ },
148
+ "50274": {
149
+ "content": " ",
150
+ "lstrip": false,
151
+ "normalized": true,
152
+ "rstrip": false,
153
+ "single_word": false,
154
+ "special": false
155
+ },
156
+ "50275": {
157
+ "content": " ",
158
+ "lstrip": false,
159
+ "normalized": true,
160
+ "rstrip": false,
161
+ "single_word": false,
162
+ "special": false
163
+ },
164
+ "50276": {
165
+ "content": " ",
166
+ "lstrip": false,
167
+ "normalized": true,
168
+ "rstrip": false,
169
+ "single_word": false,
170
+ "special": false
171
+ },
172
+ "50277": {
173
+ "content": " ",
174
+ "lstrip": false,
175
+ "normalized": true,
176
+ "rstrip": false,
177
+ "single_word": false,
178
+ "special": false
179
+ },
180
+ "50278": {
181
+ "content": " ",
182
+ "lstrip": false,
183
+ "normalized": true,
184
+ "rstrip": false,
185
+ "single_word": false,
186
+ "special": false
187
+ },
188
+ "50279": {
189
+ "content": " ",
190
+ "lstrip": false,
191
+ "normalized": true,
192
+ "rstrip": false,
193
+ "single_word": false,
194
+ "special": false
195
+ },
196
+ "50280": {
197
+ "content": " ",
198
+ "lstrip": false,
199
+ "normalized": true,
200
+ "rstrip": false,
201
+ "single_word": false,
202
+ "special": false
203
+ },
204
+ "50281": {
205
+ "content": " ",
206
+ "lstrip": false,
207
+ "normalized": true,
208
+ "rstrip": false,
209
+ "single_word": false,
210
+ "special": false
211
+ },
212
+ "50282": {
213
+ "content": " ",
214
+ "lstrip": false,
215
+ "normalized": true,
216
+ "rstrip": false,
217
+ "single_word": false,
218
+ "special": false
219
+ },
220
+ "50283": {
221
+ "content": " ",
222
+ "lstrip": false,
223
+ "normalized": true,
224
+ "rstrip": false,
225
+ "single_word": false,
226
+ "special": false
227
+ },
228
+ "50284": {
229
+ "content": " ",
230
+ "lstrip": false,
231
+ "normalized": true,
232
+ "rstrip": false,
233
+ "single_word": false,
234
+ "special": false
235
+ },
236
+ "50285": {
237
+ "content": " ",
238
+ "lstrip": false,
239
+ "normalized": true,
240
+ "rstrip": false,
241
+ "single_word": false,
242
+ "special": false
243
+ },
244
+ "50286": {
245
+ "content": " ",
246
+ "lstrip": false,
247
+ "normalized": true,
248
+ "rstrip": false,
249
+ "single_word": false,
250
+ "special": false
251
+ },
252
+ "50287": {
253
+ "content": "\t\t\t\t\t\t\t\t\t",
254
+ "lstrip": false,
255
+ "normalized": true,
256
+ "rstrip": false,
257
+ "single_word": false,
258
+ "special": false
259
+ },
260
+ "50288": {
261
+ "content": "\t\t\t\t\t\t\t\t",
262
+ "lstrip": false,
263
+ "normalized": true,
264
+ "rstrip": false,
265
+ "single_word": false,
266
+ "special": false
267
+ },
268
+ "50289": {
269
+ "content": "\t\t\t\t\t\t\t",
270
+ "lstrip": false,
271
+ "normalized": true,
272
+ "rstrip": false,
273
+ "single_word": false,
274
+ "special": false
275
+ },
276
+ "50290": {
277
+ "content": "\t\t\t\t\t\t",
278
+ "lstrip": false,
279
+ "normalized": true,
280
+ "rstrip": false,
281
+ "single_word": false,
282
+ "special": false
283
+ },
284
+ "50291": {
285
+ "content": "\t\t\t\t\t",
286
+ "lstrip": false,
287
+ "normalized": true,
288
+ "rstrip": false,
289
+ "single_word": false,
290
+ "special": false
291
+ },
292
+ "50292": {
293
+ "content": "\t\t\t\t",
294
+ "lstrip": false,
295
+ "normalized": true,
296
+ "rstrip": false,
297
+ "single_word": false,
298
+ "special": false
299
+ },
300
+ "50293": {
301
+ "content": "\t\t\t",
302
+ "lstrip": false,
303
+ "normalized": true,
304
+ "rstrip": false,
305
+ "single_word": false,
306
+ "special": false
307
+ },
308
+ "50294": {
309
+ "content": "\t\t",
310
+ "lstrip": false,
311
+ "normalized": true,
312
+ "rstrip": false,
313
+ "single_word": false,
314
+ "special": false
315
+ }
316
+ },
317
+ "bos_token": "<|endoftext|>",
318
+ "clean_up_tokenization_spaces": false,
319
+ "eos_token": "<|endoftext|>",
320
+ "max_length": 512,
321
+ "model_max_length": 2048,
322
+ "pad_to_multiple_of": null,
323
+ "pad_token": "<|endoftext|>",
324
+ "pad_token_type_id": 0,
325
+ "padding_side": "right",
326
+ "return_token_type_ids": false,
327
+ "stride": 0,
328
+ "tokenizer_class": "CodeGenTokenizer",
329
+ "truncation_side": "right",
330
+ "truncation_strategy": "longest_first",
331
+ "unk_token": "<|endoftext|>"
332
+ }
flutter_codegen_model/checkpoint-1500/trainer_state.json ADDED
@@ -0,0 +1,1083 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.03930611603165453,
5
+ "eval_steps": 500,
6
+ "global_step": 1500,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0002620407735443635,
13
+ "grad_norm": 1.2820378541946411,
14
+ "learning_rate": 4.999563265377426e-05,
15
+ "loss": 0.6609,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.000524081547088727,
20
+ "grad_norm": 1.3106062412261963,
21
+ "learning_rate": 4.999126530754852e-05,
22
+ "loss": 0.8066,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.0007861223206330905,
27
+ "grad_norm": 1.6985974311828613,
28
+ "learning_rate": 4.9986897961322784e-05,
29
+ "loss": 0.6863,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.001048163094177454,
34
+ "grad_norm": 1.2552741765975952,
35
+ "learning_rate": 4.9982530615097044e-05,
36
+ "loss": 0.5674,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.0013102038677218176,
41
+ "grad_norm": 1.3928155899047852,
42
+ "learning_rate": 4.9978163268871304e-05,
43
+ "loss": 0.5992,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.001572244641266181,
48
+ "grad_norm": 1.5794411897659302,
49
+ "learning_rate": 4.9973795922645565e-05,
50
+ "loss": 0.7939,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.0018342854148105446,
55
+ "grad_norm": 1.4024512767791748,
56
+ "learning_rate": 4.9969428576419825e-05,
57
+ "loss": 0.6178,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.002096326188354908,
62
+ "grad_norm": 1.2232664823532104,
63
+ "learning_rate": 4.9965061230194085e-05,
64
+ "loss": 0.7544,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.0023583669618992714,
69
+ "grad_norm": 1.5368870496749878,
70
+ "learning_rate": 4.9960693883968346e-05,
71
+ "loss": 0.6645,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.002620407735443635,
76
+ "grad_norm": 1.6177372932434082,
77
+ "learning_rate": 4.9956326537742606e-05,
78
+ "loss": 0.6329,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.0028824485089879986,
83
+ "grad_norm": 1.3803173303604126,
84
+ "learning_rate": 4.995195919151687e-05,
85
+ "loss": 0.7114,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.003144489282532362,
90
+ "grad_norm": 1.5191670656204224,
91
+ "learning_rate": 4.9947591845291134e-05,
92
+ "loss": 0.8717,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.0034065300560767254,
97
+ "grad_norm": 1.2967548370361328,
98
+ "learning_rate": 4.994322449906539e-05,
99
+ "loss": 0.7618,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.003668570829621089,
104
+ "grad_norm": 1.8742738962173462,
105
+ "learning_rate": 4.9938857152839655e-05,
106
+ "loss": 0.5896,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.003930611603165452,
111
+ "grad_norm": 1.697966456413269,
112
+ "learning_rate": 4.993448980661391e-05,
113
+ "loss": 0.7493,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.004192652376709816,
118
+ "grad_norm": 1.5282775163650513,
119
+ "learning_rate": 4.9930122460388175e-05,
120
+ "loss": 0.6359,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.00445469315025418,
125
+ "grad_norm": 1.2776225805282593,
126
+ "learning_rate": 4.992575511416243e-05,
127
+ "loss": 0.7677,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 0.004716733923798543,
132
+ "grad_norm": 1.2036848068237305,
133
+ "learning_rate": 4.9921387767936696e-05,
134
+ "loss": 0.6234,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 0.0049787746973429066,
139
+ "grad_norm": 1.1325912475585938,
140
+ "learning_rate": 4.991702042171095e-05,
141
+ "loss": 0.624,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 0.00524081547088727,
146
+ "grad_norm": 1.3846409320831299,
147
+ "learning_rate": 4.991265307548522e-05,
148
+ "loss": 0.5979,
149
+ "step": 200
150
+ },
151
+ {
152
+ "epoch": 0.005502856244431633,
153
+ "grad_norm": 1.9792439937591553,
154
+ "learning_rate": 4.990828572925948e-05,
155
+ "loss": 0.7897,
156
+ "step": 210
157
+ },
158
+ {
159
+ "epoch": 0.005764897017975997,
160
+ "grad_norm": 1.8546253442764282,
161
+ "learning_rate": 4.990391838303374e-05,
162
+ "loss": 0.7035,
163
+ "step": 220
164
+ },
165
+ {
166
+ "epoch": 0.006026937791520361,
167
+ "grad_norm": 1.5434975624084473,
168
+ "learning_rate": 4.9899551036808e-05,
169
+ "loss": 0.7092,
170
+ "step": 230
171
+ },
172
+ {
173
+ "epoch": 0.006288978565064724,
174
+ "grad_norm": 1.0328209400177002,
175
+ "learning_rate": 4.989518369058226e-05,
176
+ "loss": 0.6239,
177
+ "step": 240
178
+ },
179
+ {
180
+ "epoch": 0.006551019338609088,
181
+ "grad_norm": 0.9389006495475769,
182
+ "learning_rate": 4.989081634435652e-05,
183
+ "loss": 0.6957,
184
+ "step": 250
185
+ },
186
+ {
187
+ "epoch": 0.006813060112153451,
188
+ "grad_norm": 1.0274962186813354,
189
+ "learning_rate": 4.988644899813078e-05,
190
+ "loss": 0.5302,
191
+ "step": 260
192
+ },
193
+ {
194
+ "epoch": 0.0070751008856978145,
195
+ "grad_norm": 1.2626285552978516,
196
+ "learning_rate": 4.988208165190504e-05,
197
+ "loss": 0.6541,
198
+ "step": 270
199
+ },
200
+ {
201
+ "epoch": 0.007337141659242178,
202
+ "grad_norm": 1.4558316469192505,
203
+ "learning_rate": 4.98777143056793e-05,
204
+ "loss": 0.7284,
205
+ "step": 280
206
+ },
207
+ {
208
+ "epoch": 0.007599182432786541,
209
+ "grad_norm": 1.3997328281402588,
210
+ "learning_rate": 4.987334695945356e-05,
211
+ "loss": 0.6888,
212
+ "step": 290
213
+ },
214
+ {
215
+ "epoch": 0.007861223206330904,
216
+ "grad_norm": 1.7742432355880737,
217
+ "learning_rate": 4.986897961322782e-05,
218
+ "loss": 0.6869,
219
+ "step": 300
220
+ },
221
+ {
222
+ "epoch": 0.008123263979875268,
223
+ "grad_norm": 1.4925923347473145,
224
+ "learning_rate": 4.986461226700208e-05,
225
+ "loss": 0.731,
226
+ "step": 310
227
+ },
228
+ {
229
+ "epoch": 0.008385304753419632,
230
+ "grad_norm": 1.5941082239151,
231
+ "learning_rate": 4.986024492077634e-05,
232
+ "loss": 0.7149,
233
+ "step": 320
234
+ },
235
+ {
236
+ "epoch": 0.008647345526963996,
237
+ "grad_norm": 1.507450819015503,
238
+ "learning_rate": 4.98558775745506e-05,
239
+ "loss": 0.6443,
240
+ "step": 330
241
+ },
242
+ {
243
+ "epoch": 0.00890938630050836,
244
+ "grad_norm": 0.9866214990615845,
245
+ "learning_rate": 4.985151022832487e-05,
246
+ "loss": 0.6003,
247
+ "step": 340
248
+ },
249
+ {
250
+ "epoch": 0.009171427074052723,
251
+ "grad_norm": 0.9682250022888184,
252
+ "learning_rate": 4.984714288209912e-05,
253
+ "loss": 0.6602,
254
+ "step": 350
255
+ },
256
+ {
257
+ "epoch": 0.009433467847597085,
258
+ "grad_norm": 1.7567181587219238,
259
+ "learning_rate": 4.984277553587338e-05,
260
+ "loss": 0.5647,
261
+ "step": 360
262
+ },
263
+ {
264
+ "epoch": 0.00969550862114145,
265
+ "grad_norm": 1.4297257661819458,
266
+ "learning_rate": 4.9838408189647643e-05,
267
+ "loss": 0.6628,
268
+ "step": 370
269
+ },
270
+ {
271
+ "epoch": 0.009957549394685813,
272
+ "grad_norm": 1.515763521194458,
273
+ "learning_rate": 4.9834040843421904e-05,
274
+ "loss": 0.568,
275
+ "step": 380
276
+ },
277
+ {
278
+ "epoch": 0.010219590168230177,
279
+ "grad_norm": 1.1806342601776123,
280
+ "learning_rate": 4.9829673497196164e-05,
281
+ "loss": 0.6729,
282
+ "step": 390
283
+ },
284
+ {
285
+ "epoch": 0.01048163094177454,
286
+ "grad_norm": 1.1328020095825195,
287
+ "learning_rate": 4.9825306150970425e-05,
288
+ "loss": 0.6963,
289
+ "step": 400
290
+ },
291
+ {
292
+ "epoch": 0.010743671715318903,
293
+ "grad_norm": 0.7740004658699036,
294
+ "learning_rate": 4.9820938804744685e-05,
295
+ "loss": 0.6654,
296
+ "step": 410
297
+ },
298
+ {
299
+ "epoch": 0.011005712488863267,
300
+ "grad_norm": 0.9519413113594055,
301
+ "learning_rate": 4.9816571458518945e-05,
302
+ "loss": 0.6487,
303
+ "step": 420
304
+ },
305
+ {
306
+ "epoch": 0.01126775326240763,
307
+ "grad_norm": 0.8964847922325134,
308
+ "learning_rate": 4.981220411229321e-05,
309
+ "loss": 0.5667,
310
+ "step": 430
311
+ },
312
+ {
313
+ "epoch": 0.011529794035951994,
314
+ "grad_norm": 1.428072452545166,
315
+ "learning_rate": 4.9807836766067466e-05,
316
+ "loss": 0.8164,
317
+ "step": 440
318
+ },
319
+ {
320
+ "epoch": 0.011791834809496358,
321
+ "grad_norm": 1.4375147819519043,
322
+ "learning_rate": 4.980346941984173e-05,
323
+ "loss": 0.5476,
324
+ "step": 450
325
+ },
326
+ {
327
+ "epoch": 0.012053875583040722,
328
+ "grad_norm": 1.1702146530151367,
329
+ "learning_rate": 4.979910207361599e-05,
330
+ "loss": 0.7342,
331
+ "step": 460
332
+ },
333
+ {
334
+ "epoch": 0.012315916356585084,
335
+ "grad_norm": 1.2703320980072021,
336
+ "learning_rate": 4.9794734727390254e-05,
337
+ "loss": 0.5767,
338
+ "step": 470
339
+ },
340
+ {
341
+ "epoch": 0.012577957130129448,
342
+ "grad_norm": 1.2520267963409424,
343
+ "learning_rate": 4.979036738116451e-05,
344
+ "loss": 0.5969,
345
+ "step": 480
346
+ },
347
+ {
348
+ "epoch": 0.012839997903673812,
349
+ "grad_norm": 1.413979172706604,
350
+ "learning_rate": 4.9786000034938775e-05,
351
+ "loss": 0.7011,
352
+ "step": 490
353
+ },
354
+ {
355
+ "epoch": 0.013102038677218176,
356
+ "grad_norm": 1.3918565511703491,
357
+ "learning_rate": 4.978163268871303e-05,
358
+ "loss": 0.6041,
359
+ "step": 500
360
+ },
361
+ {
362
+ "epoch": 0.01336407945076254,
363
+ "grad_norm": 0.9175894260406494,
364
+ "learning_rate": 4.9777265342487296e-05,
365
+ "loss": 0.5052,
366
+ "step": 510
367
+ },
368
+ {
369
+ "epoch": 0.013626120224306901,
370
+ "grad_norm": 1.296505331993103,
371
+ "learning_rate": 4.9772897996261556e-05,
372
+ "loss": 0.6076,
373
+ "step": 520
374
+ },
375
+ {
376
+ "epoch": 0.013888160997851265,
377
+ "grad_norm": 1.2490183115005493,
378
+ "learning_rate": 4.9768530650035816e-05,
379
+ "loss": 0.6287,
380
+ "step": 530
381
+ },
382
+ {
383
+ "epoch": 0.014150201771395629,
384
+ "grad_norm": 1.398285984992981,
385
+ "learning_rate": 4.976416330381008e-05,
386
+ "loss": 0.7905,
387
+ "step": 540
388
+ },
389
+ {
390
+ "epoch": 0.014412242544939993,
391
+ "grad_norm": 1.3094829320907593,
392
+ "learning_rate": 4.975979595758434e-05,
393
+ "loss": 0.5133,
394
+ "step": 550
395
+ },
396
+ {
397
+ "epoch": 0.014674283318484357,
398
+ "grad_norm": 1.7128199338912964,
399
+ "learning_rate": 4.97554286113586e-05,
400
+ "loss": 0.824,
401
+ "step": 560
402
+ },
403
+ {
404
+ "epoch": 0.014936324092028719,
405
+ "grad_norm": 1.1319103240966797,
406
+ "learning_rate": 4.975106126513286e-05,
407
+ "loss": 0.5893,
408
+ "step": 570
409
+ },
410
+ {
411
+ "epoch": 0.015198364865573083,
412
+ "grad_norm": 1.6176029443740845,
413
+ "learning_rate": 4.974669391890712e-05,
414
+ "loss": 0.581,
415
+ "step": 580
416
+ },
417
+ {
418
+ "epoch": 0.015460405639117446,
419
+ "grad_norm": 1.590836524963379,
420
+ "learning_rate": 4.974232657268138e-05,
421
+ "loss": 0.5814,
422
+ "step": 590
423
+ },
424
+ {
425
+ "epoch": 0.01572244641266181,
426
+ "grad_norm": 1.6922227144241333,
427
+ "learning_rate": 4.973795922645564e-05,
428
+ "loss": 0.5917,
429
+ "step": 600
430
+ },
431
+ {
432
+ "epoch": 0.015984487186206174,
433
+ "grad_norm": 1.3479337692260742,
434
+ "learning_rate": 4.97335918802299e-05,
435
+ "loss": 0.7356,
436
+ "step": 610
437
+ },
438
+ {
439
+ "epoch": 0.016246527959750536,
440
+ "grad_norm": 2.2019124031066895,
441
+ "learning_rate": 4.972922453400416e-05,
442
+ "loss": 0.6197,
443
+ "step": 620
444
+ },
445
+ {
446
+ "epoch": 0.016508568733294902,
447
+ "grad_norm": 1.6211423873901367,
448
+ "learning_rate": 4.972485718777842e-05,
449
+ "loss": 0.6299,
450
+ "step": 630
451
+ },
452
+ {
453
+ "epoch": 0.016770609506839264,
454
+ "grad_norm": 1.157416582107544,
455
+ "learning_rate": 4.972048984155268e-05,
456
+ "loss": 0.6343,
457
+ "step": 640
458
+ },
459
+ {
460
+ "epoch": 0.01703265028038363,
461
+ "grad_norm": 1.2576712369918823,
462
+ "learning_rate": 4.971612249532695e-05,
463
+ "loss": 0.6126,
464
+ "step": 650
465
+ },
466
+ {
467
+ "epoch": 0.01729469105392799,
468
+ "grad_norm": 1.3852715492248535,
469
+ "learning_rate": 4.97117551491012e-05,
470
+ "loss": 0.6352,
471
+ "step": 660
472
+ },
473
+ {
474
+ "epoch": 0.017556731827472354,
475
+ "grad_norm": 1.0178048610687256,
476
+ "learning_rate": 4.970738780287547e-05,
477
+ "loss": 0.5923,
478
+ "step": 670
479
+ },
480
+ {
481
+ "epoch": 0.01781877260101672,
482
+ "grad_norm": 0.8760583996772766,
483
+ "learning_rate": 4.970302045664972e-05,
484
+ "loss": 0.6158,
485
+ "step": 680
486
+ },
487
+ {
488
+ "epoch": 0.01808081337456108,
489
+ "grad_norm": 0.8956984281539917,
490
+ "learning_rate": 4.969865311042398e-05,
491
+ "loss": 0.5746,
492
+ "step": 690
493
+ },
494
+ {
495
+ "epoch": 0.018342854148105447,
496
+ "grad_norm": 1.1126501560211182,
497
+ "learning_rate": 4.969428576419824e-05,
498
+ "loss": 0.6254,
499
+ "step": 700
500
+ },
501
+ {
502
+ "epoch": 0.01860489492164981,
503
+ "grad_norm": 1.168455958366394,
504
+ "learning_rate": 4.96899184179725e-05,
505
+ "loss": 0.652,
506
+ "step": 710
507
+ },
508
+ {
509
+ "epoch": 0.01886693569519417,
510
+ "grad_norm": 1.3628567457199097,
511
+ "learning_rate": 4.9685551071746764e-05,
512
+ "loss": 0.5789,
513
+ "step": 720
514
+ },
515
+ {
516
+ "epoch": 0.019128976468738536,
517
+ "grad_norm": 1.1971865892410278,
518
+ "learning_rate": 4.9681183725521024e-05,
519
+ "loss": 0.6408,
520
+ "step": 730
521
+ },
522
+ {
523
+ "epoch": 0.0193910172422829,
524
+ "grad_norm": 1.1916868686676025,
525
+ "learning_rate": 4.9676816379295285e-05,
526
+ "loss": 0.6461,
527
+ "step": 740
528
+ },
529
+ {
530
+ "epoch": 0.019653058015827264,
531
+ "grad_norm": 1.1797837018966675,
532
+ "learning_rate": 4.9672449033069545e-05,
533
+ "loss": 0.5843,
534
+ "step": 750
535
+ },
536
+ {
537
+ "epoch": 0.019915098789371626,
538
+ "grad_norm": 0.7941935658454895,
539
+ "learning_rate": 4.966808168684381e-05,
540
+ "loss": 0.6165,
541
+ "step": 760
542
+ },
543
+ {
544
+ "epoch": 0.02017713956291599,
545
+ "grad_norm": 1.4876329898834229,
546
+ "learning_rate": 4.9663714340618066e-05,
547
+ "loss": 0.6347,
548
+ "step": 770
549
+ },
550
+ {
551
+ "epoch": 0.020439180336460354,
552
+ "grad_norm": 1.1482038497924805,
553
+ "learning_rate": 4.965934699439233e-05,
554
+ "loss": 0.5662,
555
+ "step": 780
556
+ },
557
+ {
558
+ "epoch": 0.020701221110004716,
559
+ "grad_norm": 1.3942419290542603,
560
+ "learning_rate": 4.9654979648166586e-05,
561
+ "loss": 0.6189,
562
+ "step": 790
563
+ },
564
+ {
565
+ "epoch": 0.02096326188354908,
566
+ "grad_norm": 0.8826277256011963,
567
+ "learning_rate": 4.9650612301940854e-05,
568
+ "loss": 0.6801,
569
+ "step": 800
570
+ },
571
+ {
572
+ "epoch": 0.021225302657093444,
573
+ "grad_norm": 1.3729712963104248,
574
+ "learning_rate": 4.964624495571511e-05,
575
+ "loss": 0.5789,
576
+ "step": 810
577
+ },
578
+ {
579
+ "epoch": 0.021487343430637806,
580
+ "grad_norm": 0.747199296951294,
581
+ "learning_rate": 4.9641877609489374e-05,
582
+ "loss": 0.651,
583
+ "step": 820
584
+ },
585
+ {
586
+ "epoch": 0.02174938420418217,
587
+ "grad_norm": 0.7911145091056824,
588
+ "learning_rate": 4.963751026326363e-05,
589
+ "loss": 0.6834,
590
+ "step": 830
591
+ },
592
+ {
593
+ "epoch": 0.022011424977726533,
594
+ "grad_norm": 1.1725844144821167,
595
+ "learning_rate": 4.9633142917037895e-05,
596
+ "loss": 0.6687,
597
+ "step": 840
598
+ },
599
+ {
600
+ "epoch": 0.0222734657512709,
601
+ "grad_norm": 1.2759829759597778,
602
+ "learning_rate": 4.9628775570812156e-05,
603
+ "loss": 0.6612,
604
+ "step": 850
605
+ },
606
+ {
607
+ "epoch": 0.02253550652481526,
608
+ "grad_norm": 1.497684359550476,
609
+ "learning_rate": 4.9624408224586416e-05,
610
+ "loss": 0.686,
611
+ "step": 860
612
+ },
613
+ {
614
+ "epoch": 0.022797547298359623,
615
+ "grad_norm": 1.4431102275848389,
616
+ "learning_rate": 4.9620040878360676e-05,
617
+ "loss": 0.5838,
618
+ "step": 870
619
+ },
620
+ {
621
+ "epoch": 0.02305958807190399,
622
+ "grad_norm": 0.8864196538925171,
623
+ "learning_rate": 4.961567353213494e-05,
624
+ "loss": 0.6076,
625
+ "step": 880
626
+ },
627
+ {
628
+ "epoch": 0.02332162884544835,
629
+ "grad_norm": 1.4421597719192505,
630
+ "learning_rate": 4.96113061859092e-05,
631
+ "loss": 0.6669,
632
+ "step": 890
633
+ },
634
+ {
635
+ "epoch": 0.023583669618992716,
636
+ "grad_norm": 1.541601300239563,
637
+ "learning_rate": 4.960693883968346e-05,
638
+ "loss": 0.477,
639
+ "step": 900
640
+ },
641
+ {
642
+ "epoch": 0.02384571039253708,
643
+ "grad_norm": 1.0725853443145752,
644
+ "learning_rate": 4.960257149345772e-05,
645
+ "loss": 0.5399,
646
+ "step": 910
647
+ },
648
+ {
649
+ "epoch": 0.024107751166081444,
650
+ "grad_norm": 1.4579834938049316,
651
+ "learning_rate": 4.959820414723198e-05,
652
+ "loss": 0.636,
653
+ "step": 920
654
+ },
655
+ {
656
+ "epoch": 0.024369791939625806,
657
+ "grad_norm": 1.1018449068069458,
658
+ "learning_rate": 4.959383680100624e-05,
659
+ "loss": 0.7731,
660
+ "step": 930
661
+ },
662
+ {
663
+ "epoch": 0.024631832713170168,
664
+ "grad_norm": 1.3531861305236816,
665
+ "learning_rate": 4.95894694547805e-05,
666
+ "loss": 0.7017,
667
+ "step": 940
668
+ },
669
+ {
670
+ "epoch": 0.024893873486714534,
671
+ "grad_norm": 1.1225773096084595,
672
+ "learning_rate": 4.958510210855476e-05,
673
+ "loss": 0.7228,
674
+ "step": 950
675
+ },
676
+ {
677
+ "epoch": 0.025155914260258896,
678
+ "grad_norm": 0.6442508697509766,
679
+ "learning_rate": 4.958073476232902e-05,
680
+ "loss": 0.4421,
681
+ "step": 960
682
+ },
683
+ {
684
+ "epoch": 0.02541795503380326,
685
+ "grad_norm": 1.1338638067245483,
686
+ "learning_rate": 4.957636741610328e-05,
687
+ "loss": 0.6533,
688
+ "step": 970
689
+ },
690
+ {
691
+ "epoch": 0.025679995807347623,
692
+ "grad_norm": 1.0796573162078857,
693
+ "learning_rate": 4.957200006987755e-05,
694
+ "loss": 0.6407,
695
+ "step": 980
696
+ },
697
+ {
698
+ "epoch": 0.025942036580891985,
699
+ "grad_norm": 1.001578450202942,
700
+ "learning_rate": 4.95676327236518e-05,
701
+ "loss": 0.652,
702
+ "step": 990
703
+ },
704
+ {
705
+ "epoch": 0.02620407735443635,
706
+ "grad_norm": 1.521545648574829,
707
+ "learning_rate": 4.956326537742607e-05,
708
+ "loss": 0.4812,
709
+ "step": 1000
710
+ },
711
+ {
712
+ "epoch": 0.026466118127980713,
713
+ "grad_norm": 1.1855580806732178,
714
+ "learning_rate": 4.955889803120032e-05,
715
+ "loss": 0.5927,
716
+ "step": 1010
717
+ },
718
+ {
719
+ "epoch": 0.02672815890152508,
720
+ "grad_norm": 1.3463046550750732,
721
+ "learning_rate": 4.955453068497459e-05,
722
+ "loss": 0.7304,
723
+ "step": 1020
724
+ },
725
+ {
726
+ "epoch": 0.02699019967506944,
727
+ "grad_norm": 1.0500322580337524,
728
+ "learning_rate": 4.955016333874884e-05,
729
+ "loss": 0.5409,
730
+ "step": 1030
731
+ },
732
+ {
733
+ "epoch": 0.027252240448613803,
734
+ "grad_norm": 1.4060779809951782,
735
+ "learning_rate": 4.95457959925231e-05,
736
+ "loss": 0.7348,
737
+ "step": 1040
738
+ },
739
+ {
740
+ "epoch": 0.02751428122215817,
741
+ "grad_norm": 1.433759331703186,
742
+ "learning_rate": 4.954142864629736e-05,
743
+ "loss": 0.6079,
744
+ "step": 1050
745
+ },
746
+ {
747
+ "epoch": 0.02777632199570253,
748
+ "grad_norm": 2.5895800590515137,
749
+ "learning_rate": 4.9537061300071624e-05,
750
+ "loss": 0.5789,
751
+ "step": 1060
752
+ },
753
+ {
754
+ "epoch": 0.028038362769246896,
755
+ "grad_norm": 1.1626940965652466,
756
+ "learning_rate": 4.953269395384589e-05,
757
+ "loss": 0.5478,
758
+ "step": 1070
759
+ },
760
+ {
761
+ "epoch": 0.028300403542791258,
762
+ "grad_norm": 1.6183115243911743,
763
+ "learning_rate": 4.9528326607620144e-05,
764
+ "loss": 0.6782,
765
+ "step": 1080
766
+ },
767
+ {
768
+ "epoch": 0.02856244431633562,
769
+ "grad_norm": 1.5321258306503296,
770
+ "learning_rate": 4.952395926139441e-05,
771
+ "loss": 0.5878,
772
+ "step": 1090
773
+ },
774
+ {
775
+ "epoch": 0.028824485089879986,
776
+ "grad_norm": 0.9456602334976196,
777
+ "learning_rate": 4.9519591915168665e-05,
778
+ "loss": 0.6277,
779
+ "step": 1100
780
+ },
781
+ {
782
+ "epoch": 0.029086525863424348,
783
+ "grad_norm": 1.0988554954528809,
784
+ "learning_rate": 4.951522456894293e-05,
785
+ "loss": 0.5019,
786
+ "step": 1110
787
+ },
788
+ {
789
+ "epoch": 0.029348566636968713,
790
+ "grad_norm": 1.053934931755066,
791
+ "learning_rate": 4.9510857222717186e-05,
792
+ "loss": 0.6578,
793
+ "step": 1120
794
+ },
795
+ {
796
+ "epoch": 0.029610607410513076,
797
+ "grad_norm": 1.292533040046692,
798
+ "learning_rate": 4.950648987649145e-05,
799
+ "loss": 0.6115,
800
+ "step": 1130
801
+ },
802
+ {
803
+ "epoch": 0.029872648184057438,
804
+ "grad_norm": 0.8503355383872986,
805
+ "learning_rate": 4.950212253026571e-05,
806
+ "loss": 0.56,
807
+ "step": 1140
808
+ },
809
+ {
810
+ "epoch": 0.030134688957601803,
811
+ "grad_norm": 1.5698297023773193,
812
+ "learning_rate": 4.9497755184039974e-05,
813
+ "loss": 0.6208,
814
+ "step": 1150
815
+ },
816
+ {
817
+ "epoch": 0.030396729731146165,
818
+ "grad_norm": 1.3464306592941284,
819
+ "learning_rate": 4.9493387837814234e-05,
820
+ "loss": 0.5812,
821
+ "step": 1160
822
+ },
823
+ {
824
+ "epoch": 0.03065877050469053,
825
+ "grad_norm": 0.9388158321380615,
826
+ "learning_rate": 4.9489020491588495e-05,
827
+ "loss": 0.5684,
828
+ "step": 1170
829
+ },
830
+ {
831
+ "epoch": 0.030920811278234893,
832
+ "grad_norm": 1.271061897277832,
833
+ "learning_rate": 4.9484653145362755e-05,
834
+ "loss": 0.5456,
835
+ "step": 1180
836
+ },
837
+ {
838
+ "epoch": 0.03118285205177926,
839
+ "grad_norm": 0.8666424751281738,
840
+ "learning_rate": 4.9480285799137015e-05,
841
+ "loss": 0.5418,
842
+ "step": 1190
843
+ },
844
+ {
845
+ "epoch": 0.03144489282532362,
846
+ "grad_norm": 1.2482844591140747,
847
+ "learning_rate": 4.9475918452911276e-05,
848
+ "loss": 0.7439,
849
+ "step": 1200
850
+ },
851
+ {
852
+ "epoch": 0.03170693359886798,
853
+ "grad_norm": 1.226784110069275,
854
+ "learning_rate": 4.9471551106685536e-05,
855
+ "loss": 0.5813,
856
+ "step": 1210
857
+ },
858
+ {
859
+ "epoch": 0.03196897437241235,
860
+ "grad_norm": 1.1704344749450684,
861
+ "learning_rate": 4.9467183760459797e-05,
862
+ "loss": 0.757,
863
+ "step": 1220
864
+ },
865
+ {
866
+ "epoch": 0.032231015145956714,
867
+ "grad_norm": 1.0429288148880005,
868
+ "learning_rate": 4.946281641423406e-05,
869
+ "loss": 0.5162,
870
+ "step": 1230
871
+ },
872
+ {
873
+ "epoch": 0.03249305591950107,
874
+ "grad_norm": 1.3331559896469116,
875
+ "learning_rate": 4.945844906800832e-05,
876
+ "loss": 0.6224,
877
+ "step": 1240
878
+ },
879
+ {
880
+ "epoch": 0.03275509669304544,
881
+ "grad_norm": 1.2090318202972412,
882
+ "learning_rate": 4.945408172178258e-05,
883
+ "loss": 0.6473,
884
+ "step": 1250
885
+ },
886
+ {
887
+ "epoch": 0.033017137466589803,
888
+ "grad_norm": 1.3969329595565796,
889
+ "learning_rate": 4.944971437555684e-05,
890
+ "loss": 0.5806,
891
+ "step": 1260
892
+ },
893
+ {
894
+ "epoch": 0.03327917824013416,
895
+ "grad_norm": 1.6032638549804688,
896
+ "learning_rate": 4.94453470293311e-05,
897
+ "loss": 0.5766,
898
+ "step": 1270
899
+ },
900
+ {
901
+ "epoch": 0.03354121901367853,
902
+ "grad_norm": 1.0155426263809204,
903
+ "learning_rate": 4.944097968310536e-05,
904
+ "loss": 0.5711,
905
+ "step": 1280
906
+ },
907
+ {
908
+ "epoch": 0.03380325978722289,
909
+ "grad_norm": 1.2512564659118652,
910
+ "learning_rate": 4.9436612336879626e-05,
911
+ "loss": 0.6783,
912
+ "step": 1290
913
+ },
914
+ {
915
+ "epoch": 0.03406530056076726,
916
+ "grad_norm": 1.0756443738937378,
917
+ "learning_rate": 4.943224499065388e-05,
918
+ "loss": 0.6255,
919
+ "step": 1300
920
+ },
921
+ {
922
+ "epoch": 0.03432734133431162,
923
+ "grad_norm": 1.2002949714660645,
924
+ "learning_rate": 4.942787764442815e-05,
925
+ "loss": 0.6057,
926
+ "step": 1310
927
+ },
928
+ {
929
+ "epoch": 0.03458938210785598,
930
+ "grad_norm": 0.6843838691711426,
931
+ "learning_rate": 4.94235102982024e-05,
932
+ "loss": 0.5411,
933
+ "step": 1320
934
+ },
935
+ {
936
+ "epoch": 0.03485142288140035,
937
+ "grad_norm": 0.8038992285728455,
938
+ "learning_rate": 4.941914295197667e-05,
939
+ "loss": 0.6097,
940
+ "step": 1330
941
+ },
942
+ {
943
+ "epoch": 0.03511346365494471,
944
+ "grad_norm": 1.2226810455322266,
945
+ "learning_rate": 4.941477560575092e-05,
946
+ "loss": 0.5731,
947
+ "step": 1340
948
+ },
949
+ {
950
+ "epoch": 0.03537550442848907,
951
+ "grad_norm": 1.236194372177124,
952
+ "learning_rate": 4.941040825952519e-05,
953
+ "loss": 0.5677,
954
+ "step": 1350
955
+ },
956
+ {
957
+ "epoch": 0.03563754520203344,
958
+ "grad_norm": 1.2129302024841309,
959
+ "learning_rate": 4.940604091329944e-05,
960
+ "loss": 0.6031,
961
+ "step": 1360
962
+ },
963
+ {
964
+ "epoch": 0.0358995859755778,
965
+ "grad_norm": 1.5371068716049194,
966
+ "learning_rate": 4.94016735670737e-05,
967
+ "loss": 0.5957,
968
+ "step": 1370
969
+ },
970
+ {
971
+ "epoch": 0.03616162674912216,
972
+ "grad_norm": 1.6025753021240234,
973
+ "learning_rate": 4.939730622084797e-05,
974
+ "loss": 0.6261,
975
+ "step": 1380
976
+ },
977
+ {
978
+ "epoch": 0.03642366752266653,
979
+ "grad_norm": 1.3252272605895996,
980
+ "learning_rate": 4.939293887462222e-05,
981
+ "loss": 0.626,
982
+ "step": 1390
983
+ },
984
+ {
985
+ "epoch": 0.036685708296210894,
986
+ "grad_norm": 1.0813733339309692,
987
+ "learning_rate": 4.938857152839649e-05,
988
+ "loss": 0.6659,
989
+ "step": 1400
990
+ },
991
+ {
992
+ "epoch": 0.03694774906975525,
993
+ "grad_norm": 1.280362844467163,
994
+ "learning_rate": 4.9384204182170744e-05,
995
+ "loss": 0.5828,
996
+ "step": 1410
997
+ },
998
+ {
999
+ "epoch": 0.03720978984329962,
1000
+ "grad_norm": 0.7822287082672119,
1001
+ "learning_rate": 4.937983683594501e-05,
1002
+ "loss": 0.4939,
1003
+ "step": 1420
1004
+ },
1005
+ {
1006
+ "epoch": 0.03747183061684398,
1007
+ "grad_norm": 1.0170814990997314,
1008
+ "learning_rate": 4.9375469489719265e-05,
1009
+ "loss": 0.5846,
1010
+ "step": 1430
1011
+ },
1012
+ {
1013
+ "epoch": 0.03773387139038834,
1014
+ "grad_norm": 1.4380531311035156,
1015
+ "learning_rate": 4.937110214349353e-05,
1016
+ "loss": 0.5203,
1017
+ "step": 1440
1018
+ },
1019
+ {
1020
+ "epoch": 0.03799591216393271,
1021
+ "grad_norm": 0.9485931396484375,
1022
+ "learning_rate": 4.9366734797267785e-05,
1023
+ "loss": 0.4703,
1024
+ "step": 1450
1025
+ },
1026
+ {
1027
+ "epoch": 0.03825795293747707,
1028
+ "grad_norm": 1.4566253423690796,
1029
+ "learning_rate": 4.936236745104205e-05,
1030
+ "loss": 0.6879,
1031
+ "step": 1460
1032
+ },
1033
+ {
1034
+ "epoch": 0.03851999371102143,
1035
+ "grad_norm": 1.3309845924377441,
1036
+ "learning_rate": 4.935800010481631e-05,
1037
+ "loss": 0.6359,
1038
+ "step": 1470
1039
+ },
1040
+ {
1041
+ "epoch": 0.0387820344845658,
1042
+ "grad_norm": 0.8272371292114258,
1043
+ "learning_rate": 4.9353632758590573e-05,
1044
+ "loss": 0.5521,
1045
+ "step": 1480
1046
+ },
1047
+ {
1048
+ "epoch": 0.03904407525811016,
1049
+ "grad_norm": 1.122835397720337,
1050
+ "learning_rate": 4.9349265412364834e-05,
1051
+ "loss": 0.5871,
1052
+ "step": 1490
1053
+ },
1054
+ {
1055
+ "epoch": 0.03930611603165453,
1056
+ "grad_norm": 1.701686978340149,
1057
+ "learning_rate": 4.9344898066139094e-05,
1058
+ "loss": 0.5549,
1059
+ "step": 1500
1060
+ }
1061
+ ],
1062
+ "logging_steps": 10,
1063
+ "max_steps": 114486,
1064
+ "num_input_tokens_seen": 0,
1065
+ "num_train_epochs": 3,
1066
+ "save_steps": 500,
1067
+ "stateful_callbacks": {
1068
+ "TrainerControl": {
1069
+ "args": {
1070
+ "should_epoch_stop": false,
1071
+ "should_evaluate": false,
1072
+ "should_log": false,
1073
+ "should_save": true,
1074
+ "should_training_stop": false
1075
+ },
1076
+ "attributes": {}
1077
+ }
1078
+ },
1079
+ "total_flos": 5608556199936000.0,
1080
+ "train_batch_size": 4,
1081
+ "trial_name": null,
1082
+ "trial_params": null
1083
+ }
flutter_codegen_model/checkpoint-1500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da53cc9a018e467dc692093da1b9a59e8d739bedb689402e677cfe0810541434
3
+ size 5240
flutter_codegen_model/checkpoint-1500/vocab.json ADDED
The diff for this file is too large to render. See raw diff