NicholasCorrado commited on
Commit
625a208
·
verified ·
1 Parent(s): 52bac04

Model save

Browse files
README.md ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ base_model: alignment-handbook/zephyr-7b-sft-full
4
+ tags:
5
+ - trl
6
+ - dpo
7
+ - generated_from_trainer
8
+ model-index:
9
+ - name: rlced-conifer-tulu-2-7b-dpo-full
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # rlced-conifer-tulu-2-7b-dpo-full
17
+
18
+ This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on an unknown dataset.
19
+
20
+ ## Model description
21
+
22
+ More information needed
23
+
24
+ ## Intended uses & limitations
25
+
26
+ More information needed
27
+
28
+ ## Training and evaluation data
29
+
30
+ More information needed
31
+
32
+ ## Training procedure
33
+
34
+ ### Training hyperparameters
35
+
36
+ The following hyperparameters were used during training:
37
+ - learning_rate: 5e-07
38
+ - train_batch_size: 8
39
+ - eval_batch_size: 8
40
+ - seed: 42
41
+ - distributed_type: multi-GPU
42
+ - num_devices: 8
43
+ - gradient_accumulation_steps: 4
44
+ - total_train_batch_size: 256
45
+ - total_eval_batch_size: 64
46
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
+ - lr_scheduler_type: cosine
48
+ - lr_scheduler_warmup_ratio: 0.1
49
+ - num_epochs: 1
50
+
51
+ ### Training results
52
+
53
+
54
+
55
+ ### Framework versions
56
+
57
+ - Transformers 4.44.1
58
+ - Pytorch 2.1.2+cu121
59
+ - Datasets 2.21.0
60
+ - Tokenizers 0.19.1
all_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9984431759211209,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.21933725711709495,
5
+ "train_runtime": 13808.8776,
6
+ "train_samples": 123309,
7
+ "train_samples_per_second": 8.93,
8
+ "train_steps_per_second": 0.035
9
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.44.1"
6
+ }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12d0aae0e68e71403ad885bd5e069ce863a06c9fb0650a4d588e22d58066d07b
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62159453a790aebabd2016cc736de009d9459341ec2bf712124d89b2cbdc83ba
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ebf14a4a893299a32ab8e224b2c545c5b90b070eac1e251cd10aca2afc15df2
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd72e4e60522610a0d2942f1dadf1e8ccda6313cc22389fa3e5f0e54675f55df
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2a907ebe57823f6818ec75a87cf1cecd007b27a46f43bdbd486197aacc3236e
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:285ac92c2b4b108723bd8e44a6394393174d7de801645aa45dc70e498248ac28
3
  size 4540516344
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0.9984431759211209,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.21933725711709495,
5
+ "train_runtime": 13808.8776,
6
+ "train_samples": 123309,
7
+ "train_samples_per_second": 8.93,
8
+ "train_steps_per_second": 0.035
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,777 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.9984431759211209,
5
+ "eval_steps": 1000,
6
+ "global_step": 481,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0020757654385054488,
13
+ "grad_norm": 7.453312579539728,
14
+ "learning_rate": 1.020408163265306e-08,
15
+ "logits/chosen": -2.730942726135254,
16
+ "logits/rejected": -2.654609203338623,
17
+ "logps/chosen": -350.489990234375,
18
+ "logps/rejected": -325.546875,
19
+ "loss": 0.6931,
20
+ "rewards/accuracies": 0.0,
21
+ "rewards/chosen": 0.0,
22
+ "rewards/margins": 0.0,
23
+ "rewards/rejected": 0.0,
24
+ "step": 1
25
+ },
26
+ {
27
+ "epoch": 0.02075765438505449,
28
+ "grad_norm": 7.878888995232912,
29
+ "learning_rate": 1.0204081632653061e-07,
30
+ "logits/chosen": -2.7330236434936523,
31
+ "logits/rejected": -2.735116720199585,
32
+ "logps/chosen": -366.51531982421875,
33
+ "logps/rejected": -412.2677001953125,
34
+ "loss": 0.6931,
35
+ "rewards/accuracies": 0.4270833432674408,
36
+ "rewards/chosen": -0.000205132644623518,
37
+ "rewards/margins": 4.354613702162169e-05,
38
+ "rewards/rejected": -0.00024867875617928803,
39
+ "step": 10
40
+ },
41
+ {
42
+ "epoch": 0.04151530877010898,
43
+ "grad_norm": 7.53294584904676,
44
+ "learning_rate": 2.0408163265306121e-07,
45
+ "logits/chosen": -2.7173304557800293,
46
+ "logits/rejected": -2.693912982940674,
47
+ "logps/chosen": -378.73748779296875,
48
+ "logps/rejected": -404.47003173828125,
49
+ "loss": 0.6892,
50
+ "rewards/accuracies": 0.762499988079071,
51
+ "rewards/chosen": 0.0025507560931146145,
52
+ "rewards/margins": 0.008084132336080074,
53
+ "rewards/rejected": -0.005533376708626747,
54
+ "step": 20
55
+ },
56
+ {
57
+ "epoch": 0.062272963155163466,
58
+ "grad_norm": 8.007471678418003,
59
+ "learning_rate": 3.0612244897959183e-07,
60
+ "logits/chosen": -2.716646194458008,
61
+ "logits/rejected": -2.700786590576172,
62
+ "logps/chosen": -363.6639709472656,
63
+ "logps/rejected": -390.54083251953125,
64
+ "loss": 0.6692,
65
+ "rewards/accuracies": 0.903124988079071,
66
+ "rewards/chosen": 0.02478734776377678,
67
+ "rewards/margins": 0.051134396344423294,
68
+ "rewards/rejected": -0.026347041130065918,
69
+ "step": 30
70
+ },
71
+ {
72
+ "epoch": 0.08303061754021795,
73
+ "grad_norm": 9.3622528637074,
74
+ "learning_rate": 4.0816326530612243e-07,
75
+ "logits/chosen": -2.7087109088897705,
76
+ "logits/rejected": -2.669712543487549,
77
+ "logps/chosen": -347.83538818359375,
78
+ "logps/rejected": -376.85260009765625,
79
+ "loss": 0.6044,
80
+ "rewards/accuracies": 0.875,
81
+ "rewards/chosen": 0.05488457530736923,
82
+ "rewards/margins": 0.19498120248317719,
83
+ "rewards/rejected": -0.14009663462638855,
84
+ "step": 40
85
+ },
86
+ {
87
+ "epoch": 0.10378827192527244,
88
+ "grad_norm": 11.969966849217528,
89
+ "learning_rate": 4.999933894080444e-07,
90
+ "logits/chosen": -2.7135281562805176,
91
+ "logits/rejected": -2.6938090324401855,
92
+ "logps/chosen": -403.2617492675781,
93
+ "logps/rejected": -495.21270751953125,
94
+ "loss": 0.4674,
95
+ "rewards/accuracies": 0.871874988079071,
96
+ "rewards/chosen": -0.37011662125587463,
97
+ "rewards/margins": 0.6785963177680969,
98
+ "rewards/rejected": -1.048712968826294,
99
+ "step": 50
100
+ },
101
+ {
102
+ "epoch": 0.12454592631032693,
103
+ "grad_norm": 19.176201042712012,
104
+ "learning_rate": 4.992005413014143e-07,
105
+ "logits/chosen": -2.7302985191345215,
106
+ "logits/rejected": -2.7273764610290527,
107
+ "logps/chosen": -528.5646362304688,
108
+ "logps/rejected": -741.4615478515625,
109
+ "loss": 0.3523,
110
+ "rewards/accuracies": 0.84375,
111
+ "rewards/chosen": -1.8138965368270874,
112
+ "rewards/margins": 1.7118844985961914,
113
+ "rewards/rejected": -3.5257811546325684,
114
+ "step": 60
115
+ },
116
+ {
117
+ "epoch": 0.14530358069538143,
118
+ "grad_norm": 20.310776567548704,
119
+ "learning_rate": 4.970903776169402e-07,
120
+ "logits/chosen": -2.7460341453552246,
121
+ "logits/rejected": -2.7275753021240234,
122
+ "logps/chosen": -634.8268432617188,
123
+ "logps/rejected": -865.6439208984375,
124
+ "loss": 0.3052,
125
+ "rewards/accuracies": 0.84375,
126
+ "rewards/chosen": -2.382263422012329,
127
+ "rewards/margins": 2.2324626445770264,
128
+ "rewards/rejected": -4.6147260665893555,
129
+ "step": 70
130
+ },
131
+ {
132
+ "epoch": 0.1660612350804359,
133
+ "grad_norm": 19.886871444649117,
134
+ "learning_rate": 4.936740530314087e-07,
135
+ "logits/chosen": -2.3413853645324707,
136
+ "logits/rejected": -2.102804660797119,
137
+ "logps/chosen": -591.3840942382812,
138
+ "logps/rejected": -896.90625,
139
+ "loss": 0.25,
140
+ "rewards/accuracies": 0.903124988079071,
141
+ "rewards/chosen": -2.003624200820923,
142
+ "rewards/margins": 2.942108631134033,
143
+ "rewards/rejected": -4.945733070373535,
144
+ "step": 80
145
+ },
146
+ {
147
+ "epoch": 0.1868188894654904,
148
+ "grad_norm": 16.07417524586734,
149
+ "learning_rate": 4.889696268057348e-07,
150
+ "logits/chosen": -1.8468377590179443,
151
+ "logits/rejected": -1.1863611936569214,
152
+ "logps/chosen": -567.8921508789062,
153
+ "logps/rejected": -934.1871337890625,
154
+ "loss": 0.2254,
155
+ "rewards/accuracies": 0.893750011920929,
156
+ "rewards/chosen": -2.0263195037841797,
157
+ "rewards/margins": 3.4611504077911377,
158
+ "rewards/rejected": -5.487469673156738,
159
+ "step": 90
160
+ },
161
+ {
162
+ "epoch": 0.2075765438505449,
163
+ "grad_norm": 15.952464662940557,
164
+ "learning_rate": 4.830019673206996e-07,
165
+ "logits/chosen": -1.3128455877304077,
166
+ "logits/rejected": -0.37191733717918396,
167
+ "logps/chosen": -637.7520751953125,
168
+ "logps/rejected": -1123.6968994140625,
169
+ "loss": 0.2034,
170
+ "rewards/accuracies": 0.925000011920929,
171
+ "rewards/chosen": -2.622469425201416,
172
+ "rewards/margins": 4.605704307556152,
173
+ "rewards/rejected": -7.22817325592041,
174
+ "step": 100
175
+ },
176
+ {
177
+ "epoch": 0.2283341982355994,
178
+ "grad_norm": 20.61911089038355,
179
+ "learning_rate": 4.7580262061854606e-07,
180
+ "logits/chosen": -0.8984780311584473,
181
+ "logits/rejected": 0.00587611785158515,
182
+ "logps/chosen": -629.09521484375,
183
+ "logps/rejected": -1123.9976806640625,
184
+ "loss": 0.2043,
185
+ "rewards/accuracies": 0.9312499761581421,
186
+ "rewards/chosen": -2.5760746002197266,
187
+ "rewards/margins": 4.712790012359619,
188
+ "rewards/rejected": -7.288865089416504,
189
+ "step": 110
190
+ },
191
+ {
192
+ "epoch": 0.24909185262065386,
193
+ "grad_norm": 13.519087980837655,
194
+ "learning_rate": 4.674096436453447e-07,
195
+ "logits/chosen": -0.8746647834777832,
196
+ "logits/rejected": 0.03333142027258873,
197
+ "logps/chosen": -662.9947509765625,
198
+ "logps/rejected": -1107.119140625,
199
+ "loss": 0.1947,
200
+ "rewards/accuracies": 0.90625,
201
+ "rewards/chosen": -2.6802916526794434,
202
+ "rewards/margins": 4.257796287536621,
203
+ "rewards/rejected": -6.938088417053223,
204
+ "step": 120
205
+ },
206
+ {
207
+ "epoch": 0.26984950700570837,
208
+ "grad_norm": 15.179032922942401,
209
+ "learning_rate": 4.578674030756363e-07,
210
+ "logits/chosen": -0.5216516256332397,
211
+ "logits/rejected": 0.797188401222229,
212
+ "logps/chosen": -672.4591064453125,
213
+ "logps/rejected": -1201.9649658203125,
214
+ "loss": 0.1766,
215
+ "rewards/accuracies": 0.90625,
216
+ "rewards/chosen": -2.92866849899292,
217
+ "rewards/margins": 5.020692348480225,
218
+ "rewards/rejected": -7.949362277984619,
219
+ "step": 130
220
+ },
221
+ {
222
+ "epoch": 0.29060716139076287,
223
+ "grad_norm": 19.394840025974254,
224
+ "learning_rate": 4.4722634078279865e-07,
225
+ "logits/chosen": 0.05672640725970268,
226
+ "logits/rejected": 1.2894923686981201,
227
+ "logps/chosen": -632.3123779296875,
228
+ "logps/rejected": -1134.365478515625,
229
+ "loss": 0.1989,
230
+ "rewards/accuracies": 0.8843749761581421,
231
+ "rewards/chosen": -2.6184778213500977,
232
+ "rewards/margins": 4.792341709136963,
233
+ "rewards/rejected": -7.410820007324219,
234
+ "step": 140
235
+ },
236
+ {
237
+ "epoch": 0.3113648157758173,
238
+ "grad_norm": 15.545837682982413,
239
+ "learning_rate": 4.355427071949004e-07,
240
+ "logits/chosen": -0.034926723688840866,
241
+ "logits/rejected": 1.3178081512451172,
242
+ "logps/chosen": -625.6254272460938,
243
+ "logps/rejected": -1133.8699951171875,
244
+ "loss": 0.1657,
245
+ "rewards/accuracies": 0.925000011920929,
246
+ "rewards/chosen": -2.654139757156372,
247
+ "rewards/margins": 4.885876655578613,
248
+ "rewards/rejected": -7.540017127990723,
249
+ "step": 150
250
+ },
251
+ {
252
+ "epoch": 0.3321224701608718,
253
+ "grad_norm": 21.099719863638594,
254
+ "learning_rate": 4.228782639455674e-07,
255
+ "logits/chosen": -0.2264009416103363,
256
+ "logits/rejected": 1.3679448366165161,
257
+ "logps/chosen": -684.0534057617188,
258
+ "logps/rejected": -1281.2177734375,
259
+ "loss": 0.1738,
260
+ "rewards/accuracies": 0.934374988079071,
261
+ "rewards/chosen": -3.0014939308166504,
262
+ "rewards/margins": 5.716488838195801,
263
+ "rewards/rejected": -8.717982292175293,
264
+ "step": 160
265
+ },
266
+ {
267
+ "epoch": 0.3528801245459263,
268
+ "grad_norm": 14.4755316075222,
269
+ "learning_rate": 4.092999573916971e-07,
270
+ "logits/chosen": 0.14696760475635529,
271
+ "logits/rejected": 1.6602694988250732,
272
+ "logps/chosen": -664.4630126953125,
273
+ "logps/rejected": -1225.433837890625,
274
+ "loss": 0.1804,
275
+ "rewards/accuracies": 0.8999999761581421,
276
+ "rewards/chosen": -3.056241273880005,
277
+ "rewards/margins": 5.417787551879883,
278
+ "rewards/rejected": -8.474028587341309,
279
+ "step": 170
280
+ },
281
+ {
282
+ "epoch": 0.3736377789309808,
283
+ "grad_norm": 15.213191213812825,
284
+ "learning_rate": 3.948795647238637e-07,
285
+ "logits/chosen": -0.7323606014251709,
286
+ "logits/rejected": 1.1011723279953003,
287
+ "logps/chosen": -630.0668334960938,
288
+ "logps/rejected": -1234.099609375,
289
+ "loss": 0.1783,
290
+ "rewards/accuracies": 0.921875,
291
+ "rewards/chosen": -2.6342709064483643,
292
+ "rewards/margins": 5.844083786010742,
293
+ "rewards/rejected": -8.478353500366211,
294
+ "step": 180
295
+ },
296
+ {
297
+ "epoch": 0.39439543331603527,
298
+ "grad_norm": 16.709219865079046,
299
+ "learning_rate": 3.796933145401304e-07,
300
+ "logits/chosen": -0.12861236929893494,
301
+ "logits/rejected": 1.5260117053985596,
302
+ "logps/chosen": -731.4463500976562,
303
+ "logps/rejected": -1386.452880859375,
304
+ "loss": 0.1646,
305
+ "rewards/accuracies": 0.9375,
306
+ "rewards/chosen": -3.487692356109619,
307
+ "rewards/margins": 6.185595989227295,
308
+ "rewards/rejected": -9.673288345336914,
309
+ "step": 190
310
+ },
311
+ {
312
+ "epoch": 0.4151530877010898,
313
+ "grad_norm": 16.46330948268556,
314
+ "learning_rate": 3.638214838889801e-07,
315
+ "logits/chosen": 0.014425823464989662,
316
+ "logits/rejected": 1.5777640342712402,
317
+ "logps/chosen": -647.3234252929688,
318
+ "logps/rejected": -1219.0548095703125,
319
+ "loss": 0.1669,
320
+ "rewards/accuracies": 0.909375011920929,
321
+ "rewards/chosen": -2.797513484954834,
322
+ "rewards/margins": 5.429152011871338,
323
+ "rewards/rejected": -8.226665496826172,
324
+ "step": 200
325
+ },
326
+ {
327
+ "epoch": 0.4359107420861443,
328
+ "grad_norm": 19.39279809291309,
329
+ "learning_rate": 3.4734797391146383e-07,
330
+ "logits/chosen": -0.12084762752056122,
331
+ "logits/rejected": 1.6122562885284424,
332
+ "logps/chosen": -656.5711059570312,
333
+ "logps/rejected": -1250.8482666015625,
334
+ "loss": 0.1594,
335
+ "rewards/accuracies": 0.9281250238418579,
336
+ "rewards/chosen": -2.874763011932373,
337
+ "rewards/margins": 5.74980354309082,
338
+ "rewards/rejected": -8.624567031860352,
339
+ "step": 210
340
+ },
341
+ {
342
+ "epoch": 0.4566683964711988,
343
+ "grad_norm": 23.517592451341034,
344
+ "learning_rate": 3.3035986632579036e-07,
345
+ "logits/chosen": -1.0772771835327148,
346
+ "logits/rejected": 0.6209205389022827,
347
+ "logps/chosen": -622.2130126953125,
348
+ "logps/rejected": -1293.0042724609375,
349
+ "loss": 0.1678,
350
+ "rewards/accuracies": 0.921875,
351
+ "rewards/chosen": -2.5699634552001953,
352
+ "rewards/margins": 6.527965545654297,
353
+ "rewards/rejected": -9.097929000854492,
354
+ "step": 220
355
+ },
356
+ {
357
+ "epoch": 0.4774260508562532,
358
+ "grad_norm": 20.580951166094668,
359
+ "learning_rate": 3.1294696309885716e-07,
360
+ "logits/chosen": -1.1179264783859253,
361
+ "logits/rejected": 0.7691652178764343,
362
+ "logps/chosen": -666.9544677734375,
363
+ "logps/rejected": -1416.962158203125,
364
+ "loss": 0.1673,
365
+ "rewards/accuracies": 0.925000011920929,
366
+ "rewards/chosen": -3.058974027633667,
367
+ "rewards/margins": 7.343924522399902,
368
+ "rewards/rejected": -10.402898788452148,
369
+ "step": 230
370
+ },
371
+ {
372
+ "epoch": 0.49818370524130773,
373
+ "grad_norm": 14.600883148928759,
374
+ "learning_rate": 2.952013117380913e-07,
375
+ "logits/chosen": -0.9207614660263062,
376
+ "logits/rejected": 1.1953575611114502,
377
+ "logps/chosen": -662.4710693359375,
378
+ "logps/rejected": -1418.23291015625,
379
+ "loss": 0.1614,
380
+ "rewards/accuracies": 0.949999988079071,
381
+ "rewards/chosen": -3.0431346893310547,
382
+ "rewards/margins": 7.319464206695557,
383
+ "rewards/rejected": -10.362597465515137,
384
+ "step": 240
385
+ },
386
+ {
387
+ "epoch": 0.5189413596263622,
388
+ "grad_norm": 11.446046359523642,
389
+ "learning_rate": 2.7721671871299114e-07,
390
+ "logits/chosen": -0.7357327938079834,
391
+ "logits/rejected": 1.461576223373413,
392
+ "logps/chosen": -672.9193725585938,
393
+ "logps/rejected": -1338.949462890625,
394
+ "loss": 0.1602,
395
+ "rewards/accuracies": 0.940625011920929,
396
+ "rewards/chosen": -2.9312338829040527,
397
+ "rewards/margins": 6.449375152587891,
398
+ "rewards/rejected": -9.380608558654785,
399
+ "step": 250
400
+ },
401
+ {
402
+ "epoch": 0.5396990140114167,
403
+ "grad_norm": 14.757287503572078,
404
+ "learning_rate": 2.5908825357849993e-07,
405
+ "logits/chosen": -0.8231679797172546,
406
+ "logits/rejected": 1.1155385971069336,
407
+ "logps/chosen": -656.1690063476562,
408
+ "logps/rejected": -1271.033447265625,
409
+ "loss": 0.1622,
410
+ "rewards/accuracies": 0.9125000238418579,
411
+ "rewards/chosen": -2.8299801349639893,
412
+ "rewards/margins": 5.925788402557373,
413
+ "rewards/rejected": -8.755769729614258,
414
+ "step": 260
415
+ },
416
+ {
417
+ "epoch": 0.5604566683964712,
418
+ "grad_norm": 17.896435569322648,
419
+ "learning_rate": 2.409117464215001e-07,
420
+ "logits/chosen": -0.4632663130760193,
421
+ "logits/rejected": 1.819011926651001,
422
+ "logps/chosen": -664.986572265625,
423
+ "logps/rejected": -1455.3441162109375,
424
+ "loss": 0.1471,
425
+ "rewards/accuracies": 0.940625011920929,
426
+ "rewards/chosen": -3.113548517227173,
427
+ "rewards/margins": 7.589502811431885,
428
+ "rewards/rejected": -10.70305061340332,
429
+ "step": 270
430
+ },
431
+ {
432
+ "epoch": 0.5812143227815257,
433
+ "grad_norm": 18.08668716790038,
434
+ "learning_rate": 2.227832812870089e-07,
435
+ "logits/chosen": -0.542155385017395,
436
+ "logits/rejected": 1.9537347555160522,
437
+ "logps/chosen": -686.6569213867188,
438
+ "logps/rejected": -1460.84619140625,
439
+ "loss": 0.1557,
440
+ "rewards/accuracies": 0.9312499761581421,
441
+ "rewards/chosen": -3.106727123260498,
442
+ "rewards/margins": 7.513753414154053,
443
+ "rewards/rejected": -10.620479583740234,
444
+ "step": 280
445
+ },
446
+ {
447
+ "epoch": 0.6019719771665801,
448
+ "grad_norm": 20.414881165009998,
449
+ "learning_rate": 2.0479868826190871e-07,
450
+ "logits/chosen": -0.437448650598526,
451
+ "logits/rejected": 1.7888593673706055,
452
+ "logps/chosen": -709.882568359375,
453
+ "logps/rejected": -1392.702392578125,
454
+ "loss": 0.1617,
455
+ "rewards/accuracies": 0.918749988079071,
456
+ "rewards/chosen": -3.35121488571167,
457
+ "rewards/margins": 6.753846645355225,
458
+ "rewards/rejected": -10.105062484741211,
459
+ "step": 290
460
+ },
461
+ {
462
+ "epoch": 0.6227296315516346,
463
+ "grad_norm": 16.484968508125828,
464
+ "learning_rate": 1.8705303690114287e-07,
465
+ "logits/chosen": -0.2719939947128296,
466
+ "logits/rejected": 1.811428427696228,
467
+ "logps/chosen": -710.2088012695312,
468
+ "logps/rejected": -1419.313720703125,
469
+ "loss": 0.1478,
470
+ "rewards/accuracies": 0.9468749761581421,
471
+ "rewards/chosen": -3.3416149616241455,
472
+ "rewards/margins": 6.840612888336182,
473
+ "rewards/rejected": -10.182229042053223,
474
+ "step": 300
475
+ },
476
+ {
477
+ "epoch": 0.6434872859366891,
478
+ "grad_norm": 15.309368142287557,
479
+ "learning_rate": 1.6964013367420965e-07,
480
+ "logits/chosen": -0.341867595911026,
481
+ "logits/rejected": 1.7422988414764404,
482
+ "logps/chosen": -686.5745849609375,
483
+ "logps/rejected": -1366.7637939453125,
484
+ "loss": 0.1534,
485
+ "rewards/accuracies": 0.918749988079071,
486
+ "rewards/chosen": -3.167562961578369,
487
+ "rewards/margins": 6.726889133453369,
488
+ "rewards/rejected": -9.894452095031738,
489
+ "step": 310
490
+ },
491
+ {
492
+ "epoch": 0.6642449403217436,
493
+ "grad_norm": 11.4821386278306,
494
+ "learning_rate": 1.5265202608853628e-07,
495
+ "logits/chosen": -0.17296895384788513,
496
+ "logits/rejected": 1.9412486553192139,
497
+ "logps/chosen": -665.36376953125,
498
+ "logps/rejected": -1430.9146728515625,
499
+ "loss": 0.148,
500
+ "rewards/accuracies": 0.934374988079071,
501
+ "rewards/chosen": -3.0024094581604004,
502
+ "rewards/margins": 7.437635898590088,
503
+ "rewards/rejected": -10.440046310424805,
504
+ "step": 320
505
+ },
506
+ {
507
+ "epoch": 0.6850025947067981,
508
+ "grad_norm": 10.552793459289758,
509
+ "learning_rate": 1.3617851611101993e-07,
510
+ "logits/chosen": -0.5047305226325989,
511
+ "logits/rejected": 1.5836106538772583,
512
+ "logps/chosen": -686.6402587890625,
513
+ "logps/rejected": -1443.398193359375,
514
+ "loss": 0.1519,
515
+ "rewards/accuracies": 0.8968750238418579,
516
+ "rewards/chosen": -3.1413583755493164,
517
+ "rewards/margins": 7.2722015380859375,
518
+ "rewards/rejected": -10.413559913635254,
519
+ "step": 330
520
+ },
521
+ {
522
+ "epoch": 0.7057602490918526,
523
+ "grad_norm": 17.65296980495948,
524
+ "learning_rate": 1.2030668545986958e-07,
525
+ "logits/chosen": -0.569928765296936,
526
+ "logits/rejected": 1.6948425769805908,
527
+ "logps/chosen": -716.9683837890625,
528
+ "logps/rejected": -1518.8372802734375,
529
+ "loss": 0.1463,
530
+ "rewards/accuracies": 0.921875,
531
+ "rewards/chosen": -3.4350712299346924,
532
+ "rewards/margins": 7.720976829528809,
533
+ "rewards/rejected": -11.156047821044922,
534
+ "step": 340
535
+ },
536
+ {
537
+ "epoch": 0.7265179034769071,
538
+ "grad_norm": 14.715250965523067,
539
+ "learning_rate": 1.0512043527613623e-07,
540
+ "logits/chosen": -0.7549006342887878,
541
+ "logits/rejected": 1.3407833576202393,
542
+ "logps/chosen": -695.8525390625,
543
+ "logps/rejected": -1478.35546875,
544
+ "loss": 0.1559,
545
+ "rewards/accuracies": 0.903124988079071,
546
+ "rewards/chosen": -3.347827911376953,
547
+ "rewards/margins": 7.52248477935791,
548
+ "rewards/rejected": -10.870311737060547,
549
+ "step": 350
550
+ },
551
+ {
552
+ "epoch": 0.7472755578619616,
553
+ "grad_norm": 18.014896980938854,
554
+ "learning_rate": 9.070004260830294e-08,
555
+ "logits/chosen": -0.9223737716674805,
556
+ "logits/rejected": 1.4507310390472412,
557
+ "logps/chosen": -692.382080078125,
558
+ "logps/rejected": -1453.919921875,
559
+ "loss": 0.1437,
560
+ "rewards/accuracies": 0.925000011920929,
561
+ "rewards/chosen": -3.2790751457214355,
562
+ "rewards/margins": 7.526673316955566,
563
+ "rewards/rejected": -10.80574893951416,
564
+ "step": 360
565
+ },
566
+ {
567
+ "epoch": 0.768033212247016,
568
+ "grad_norm": 14.401106731580096,
569
+ "learning_rate": 7.712173605443267e-08,
570
+ "logits/chosen": -0.9376351237297058,
571
+ "logits/rejected": 1.583579421043396,
572
+ "logps/chosen": -700.8660888671875,
573
+ "logps/rejected": -1505.467529296875,
574
+ "loss": 0.1494,
575
+ "rewards/accuracies": 0.90625,
576
+ "rewards/chosen": -3.340554714202881,
577
+ "rewards/margins": 7.916815280914307,
578
+ "rewards/rejected": -11.257369995117188,
579
+ "step": 370
580
+ },
581
+ {
582
+ "epoch": 0.7887908666320705,
583
+ "grad_norm": 11.438125899827915,
584
+ "learning_rate": 6.445729280509957e-08,
585
+ "logits/chosen": -0.8953694105148315,
586
+ "logits/rejected": 1.4685465097427368,
587
+ "logps/chosen": -690.8338623046875,
588
+ "logps/rejected": -1588.8245849609375,
589
+ "loss": 0.1468,
590
+ "rewards/accuracies": 0.921875,
591
+ "rewards/chosen": -3.3841090202331543,
592
+ "rewards/margins": 8.392255783081055,
593
+ "rewards/rejected": -11.77636432647705,
594
+ "step": 380
595
+ },
596
+ {
597
+ "epoch": 0.809548521017125,
598
+ "grad_norm": 13.207322793003062,
599
+ "learning_rate": 5.2773659217201364e-08,
600
+ "logits/chosen": -0.9152933359146118,
601
+ "logits/rejected": 1.6746854782104492,
602
+ "logps/chosen": -707.5098266601562,
603
+ "logps/rejected": -1445.887939453125,
604
+ "loss": 0.1555,
605
+ "rewards/accuracies": 0.934374988079071,
606
+ "rewards/chosen": -3.3184616565704346,
607
+ "rewards/margins": 7.17882776260376,
608
+ "rewards/rejected": -10.497289657592773,
609
+ "step": 390
610
+ },
611
+ {
612
+ "epoch": 0.8303061754021795,
613
+ "grad_norm": 11.811171239173802,
614
+ "learning_rate": 4.213259692436366e-08,
615
+ "logits/chosen": -0.807452380657196,
616
+ "logits/rejected": 1.5794246196746826,
617
+ "logps/chosen": -692.3187255859375,
618
+ "logps/rejected": -1477.223388671875,
619
+ "loss": 0.1455,
620
+ "rewards/accuracies": 0.934374988079071,
621
+ "rewards/chosen": -3.2713589668273926,
622
+ "rewards/margins": 7.557108402252197,
623
+ "rewards/rejected": -10.82846736907959,
624
+ "step": 400
625
+ },
626
+ {
627
+ "epoch": 0.851063829787234,
628
+ "grad_norm": 13.900287224699019,
629
+ "learning_rate": 3.259035635465529e-08,
630
+ "logits/chosen": -0.6530941128730774,
631
+ "logits/rejected": 1.565045714378357,
632
+ "logps/chosen": -704.8355712890625,
633
+ "logps/rejected": -1375.3914794921875,
634
+ "loss": 0.1474,
635
+ "rewards/accuracies": 0.9125000238418579,
636
+ "rewards/chosen": -3.393775463104248,
637
+ "rewards/margins": 6.558309078216553,
638
+ "rewards/rejected": -9.952085494995117,
639
+ "step": 410
640
+ },
641
+ {
642
+ "epoch": 0.8718214841722886,
643
+ "grad_norm": 25.604242461790278,
644
+ "learning_rate": 2.4197379381453942e-08,
645
+ "logits/chosen": -0.6059257388114929,
646
+ "logits/rejected": 1.747667670249939,
647
+ "logps/chosen": -710.49365234375,
648
+ "logps/rejected": -1475.5013427734375,
649
+ "loss": 0.1551,
650
+ "rewards/accuracies": 0.918749988079071,
651
+ "rewards/chosen": -3.3643269538879395,
652
+ "rewards/margins": 7.329138278961182,
653
+ "rewards/rejected": -10.693464279174805,
654
+ "step": 420
655
+ },
656
+ {
657
+ "epoch": 0.892579138557343,
658
+ "grad_norm": 17.02837493916288,
659
+ "learning_rate": 1.699803267930039e-08,
660
+ "logits/chosen": -0.6381738781929016,
661
+ "logits/rejected": 1.581946611404419,
662
+ "logps/chosen": -700.8256225585938,
663
+ "logps/rejected": -1424.5767822265625,
664
+ "loss": 0.14,
665
+ "rewards/accuracies": 0.918749988079071,
666
+ "rewards/chosen": -3.278334140777588,
667
+ "rewards/margins": 7.011561393737793,
668
+ "rewards/rejected": -10.289896965026855,
669
+ "step": 430
670
+ },
671
+ {
672
+ "epoch": 0.9133367929423976,
673
+ "grad_norm": 12.115208573188879,
674
+ "learning_rate": 1.1030373194265114e-08,
675
+ "logits/chosen": -0.6104884743690491,
676
+ "logits/rejected": 1.8680555820465088,
677
+ "logps/chosen": -696.623046875,
678
+ "logps/rejected": -1484.712646484375,
679
+ "loss": 0.1555,
680
+ "rewards/accuracies": 0.925000011920929,
681
+ "rewards/chosen": -3.214094638824463,
682
+ "rewards/margins": 7.734494686126709,
683
+ "rewards/rejected": -10.948590278625488,
684
+ "step": 440
685
+ },
686
+ {
687
+ "epoch": 0.934094447327452,
688
+ "grad_norm": 16.53240755993655,
689
+ "learning_rate": 6.325946968591317e-09,
690
+ "logits/chosen": -0.5625468492507935,
691
+ "logits/rejected": 2.0248143672943115,
692
+ "logps/chosen": -701.9017333984375,
693
+ "logps/rejected": -1457.185791015625,
694
+ "loss": 0.1506,
695
+ "rewards/accuracies": 0.9437500238418579,
696
+ "rewards/chosen": -3.3476765155792236,
697
+ "rewards/margins": 7.459791660308838,
698
+ "rewards/rejected": -10.807466506958008,
699
+ "step": 450
700
+ },
701
+ {
702
+ "epoch": 0.9548521017125065,
703
+ "grad_norm": 13.032337936414088,
704
+ "learning_rate": 2.909622383059834e-09,
705
+ "logits/chosen": -0.5611749291419983,
706
+ "logits/rejected": 1.8944803476333618,
707
+ "logps/chosen": -692.7264404296875,
708
+ "logps/rejected": -1460.9708251953125,
709
+ "loss": 0.1466,
710
+ "rewards/accuracies": 0.9375,
711
+ "rewards/chosen": -3.228968858718872,
712
+ "rewards/margins": 7.5441155433654785,
713
+ "rewards/rejected": -10.77308464050293,
714
+ "step": 460
715
+ },
716
+ {
717
+ "epoch": 0.975609756097561,
718
+ "grad_norm": 17.252869051917436,
719
+ "learning_rate": 7.994586985856089e-10,
720
+ "logits/chosen": -0.6213638782501221,
721
+ "logits/rejected": 1.7617113590240479,
722
+ "logps/chosen": -707.3765869140625,
723
+ "logps/rejected": -1481.257080078125,
724
+ "loss": 0.1374,
725
+ "rewards/accuracies": 0.918749988079071,
726
+ "rewards/chosen": -3.3621768951416016,
727
+ "rewards/margins": 7.499251365661621,
728
+ "rewards/rejected": -10.861429214477539,
729
+ "step": 470
730
+ },
731
+ {
732
+ "epoch": 0.9963674104826155,
733
+ "grad_norm": 12.988129191271089,
734
+ "learning_rate": 6.610591955641398e-12,
735
+ "logits/chosen": -0.5171535015106201,
736
+ "logits/rejected": 1.7204986810684204,
737
+ "logps/chosen": -687.4800415039062,
738
+ "logps/rejected": -1466.64501953125,
739
+ "loss": 0.1487,
740
+ "rewards/accuracies": 0.9375,
741
+ "rewards/chosen": -3.355544328689575,
742
+ "rewards/margins": 7.49337911605835,
743
+ "rewards/rejected": -10.848923683166504,
744
+ "step": 480
745
+ },
746
+ {
747
+ "epoch": 0.9984431759211209,
748
+ "step": 481,
749
+ "total_flos": 0.0,
750
+ "train_loss": 0.21933725711709495,
751
+ "train_runtime": 13808.8776,
752
+ "train_samples_per_second": 8.93,
753
+ "train_steps_per_second": 0.035
754
+ }
755
+ ],
756
+ "logging_steps": 10,
757
+ "max_steps": 481,
758
+ "num_input_tokens_seen": 0,
759
+ "num_train_epochs": 1,
760
+ "save_steps": 100,
761
+ "stateful_callbacks": {
762
+ "TrainerControl": {
763
+ "args": {
764
+ "should_epoch_stop": false,
765
+ "should_evaluate": false,
766
+ "should_log": false,
767
+ "should_save": true,
768
+ "should_training_stop": true
769
+ },
770
+ "attributes": {}
771
+ }
772
+ },
773
+ "total_flos": 0.0,
774
+ "train_batch_size": 8,
775
+ "trial_name": null,
776
+ "trial_params": null
777
+ }