TTTXXX01 commited on
Commit
835f331
·
verified ·
1 Parent(s): 5dcb4cd

Model save

Browse files
README.md ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: alignment-handbook/zephyr-7b-sft-full
4
+ tags:
5
+ - trl
6
+ - dpo
7
+ - generated_from_trainer
8
+ model-index:
9
+ - name: DPO_Chat-zephyr-7b-sft-full
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # DPO_Chat-zephyr-7b-sft-full
17
+
18
+ This model is a fine-tuned version of [alignment-handbook/zephyr-7b-sft-full](https://huggingface.co/alignment-handbook/zephyr-7b-sft-full) on an unknown dataset.
19
+
20
+ ## Model description
21
+
22
+ More information needed
23
+
24
+ ## Intended uses & limitations
25
+
26
+ More information needed
27
+
28
+ ## Training and evaluation data
29
+
30
+ More information needed
31
+
32
+ ## Training procedure
33
+
34
+ ### Training hyperparameters
35
+
36
+ The following hyperparameters were used during training:
37
+ - learning_rate: 5e-07
38
+ - train_batch_size: 6
39
+ - eval_batch_size: 4
40
+ - seed: 42
41
+ - distributed_type: multi-GPU
42
+ - num_devices: 5
43
+ - gradient_accumulation_steps: 2
44
+ - total_train_batch_size: 60
45
+ - total_eval_batch_size: 20
46
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
+ - lr_scheduler_type: cosine
48
+ - lr_scheduler_warmup_ratio: 0.1
49
+ - num_epochs: 1
50
+
51
+ ### Training results
52
+
53
+
54
+
55
+ ### Framework versions
56
+
57
+ - Transformers 4.41.2
58
+ - Pytorch 2.3.0+cu121
59
+ - Datasets 2.19.1
60
+ - Tokenizers 0.19.1
all_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.5212765811586988,
5
+ "train_runtime": 13234.9919,
6
+ "train_samples": 61135,
7
+ "train_samples_per_second": 4.619,
8
+ "train_steps_per_second": 0.077
9
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.41.2"
6
+ }
model-00001-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a83551a4e8191182d1d13b37211ef311a474d71b7e1ad37112bb172d71a80eb
3
+ size 4943162336
model-00002-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ce64c0d27da7de387ae1c5731a5c8e0f15c8e1c1efcd099d934cebc01a05ecd
3
+ size 4999819336
model-00003-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e002e50f41870a3cfdf4bdd33e43a0d7dd241b725191486af4047c87b2e69e49
3
+ size 4540516344
model.safetensors.index.json ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 14483464192
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "model-00003-of-00003.safetensors",
7
+ "model.embed_tokens.weight": "model-00001-of-00003.safetensors",
8
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors",
9
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
10
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
11
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
12
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
13
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
14
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
15
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
16
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
17
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors",
18
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
19
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
20
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
21
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
22
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
23
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
24
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
25
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
26
+ "model.layers.10.input_layernorm.weight": "model-00002-of-00003.safetensors",
27
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
28
+ "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
29
+ "model.layers.10.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
30
+ "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
31
+ "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
32
+ "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
33
+ "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
34
+ "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
35
+ "model.layers.11.input_layernorm.weight": "model-00002-of-00003.safetensors",
36
+ "model.layers.11.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
37
+ "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
38
+ "model.layers.11.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
39
+ "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
40
+ "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
41
+ "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
42
+ "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
43
+ "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
44
+ "model.layers.12.input_layernorm.weight": "model-00002-of-00003.safetensors",
45
+ "model.layers.12.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
46
+ "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
47
+ "model.layers.12.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
48
+ "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
49
+ "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
50
+ "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
51
+ "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
52
+ "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
53
+ "model.layers.13.input_layernorm.weight": "model-00002-of-00003.safetensors",
54
+ "model.layers.13.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
55
+ "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
56
+ "model.layers.13.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
57
+ "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
58
+ "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
59
+ "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
60
+ "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
61
+ "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
62
+ "model.layers.14.input_layernorm.weight": "model-00002-of-00003.safetensors",
63
+ "model.layers.14.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
64
+ "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
65
+ "model.layers.14.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
66
+ "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
67
+ "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
68
+ "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
69
+ "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
70
+ "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
71
+ "model.layers.15.input_layernorm.weight": "model-00002-of-00003.safetensors",
72
+ "model.layers.15.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
73
+ "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
74
+ "model.layers.15.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
75
+ "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
76
+ "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
77
+ "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
78
+ "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
79
+ "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
80
+ "model.layers.16.input_layernorm.weight": "model-00002-of-00003.safetensors",
81
+ "model.layers.16.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
82
+ "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
83
+ "model.layers.16.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
84
+ "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
85
+ "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
86
+ "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
87
+ "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
88
+ "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
89
+ "model.layers.17.input_layernorm.weight": "model-00002-of-00003.safetensors",
90
+ "model.layers.17.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
91
+ "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
92
+ "model.layers.17.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
93
+ "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
94
+ "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
95
+ "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
96
+ "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
97
+ "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
98
+ "model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors",
99
+ "model.layers.18.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
100
+ "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
101
+ "model.layers.18.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
102
+ "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
103
+ "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
104
+ "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
105
+ "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
106
+ "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
107
+ "model.layers.19.input_layernorm.weight": "model-00002-of-00003.safetensors",
108
+ "model.layers.19.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
109
+ "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
110
+ "model.layers.19.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
111
+ "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
112
+ "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
113
+ "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
114
+ "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
115
+ "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
116
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors",
117
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
118
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
119
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
120
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
121
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
122
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
123
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
124
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
125
+ "model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors",
126
+ "model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
127
+ "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
128
+ "model.layers.20.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
129
+ "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
130
+ "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
131
+ "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
132
+ "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
133
+ "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
134
+ "model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors",
135
+ "model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
136
+ "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
137
+ "model.layers.21.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
138
+ "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
139
+ "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
140
+ "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
141
+ "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
142
+ "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
143
+ "model.layers.22.input_layernorm.weight": "model-00003-of-00003.safetensors",
144
+ "model.layers.22.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
145
+ "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
146
+ "model.layers.22.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
147
+ "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
148
+ "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
149
+ "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
150
+ "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
151
+ "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
152
+ "model.layers.23.input_layernorm.weight": "model-00003-of-00003.safetensors",
153
+ "model.layers.23.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
154
+ "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
155
+ "model.layers.23.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
156
+ "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
157
+ "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
158
+ "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
159
+ "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
160
+ "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
161
+ "model.layers.24.input_layernorm.weight": "model-00003-of-00003.safetensors",
162
+ "model.layers.24.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
163
+ "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
164
+ "model.layers.24.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
165
+ "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
166
+ "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
167
+ "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
168
+ "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
169
+ "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
170
+ "model.layers.25.input_layernorm.weight": "model-00003-of-00003.safetensors",
171
+ "model.layers.25.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
172
+ "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
173
+ "model.layers.25.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
174
+ "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
175
+ "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
176
+ "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
177
+ "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
178
+ "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
179
+ "model.layers.26.input_layernorm.weight": "model-00003-of-00003.safetensors",
180
+ "model.layers.26.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
181
+ "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
182
+ "model.layers.26.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
183
+ "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
184
+ "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
185
+ "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
186
+ "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
187
+ "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
188
+ "model.layers.27.input_layernorm.weight": "model-00003-of-00003.safetensors",
189
+ "model.layers.27.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
190
+ "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
191
+ "model.layers.27.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
192
+ "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
193
+ "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
194
+ "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
195
+ "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
196
+ "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
197
+ "model.layers.28.input_layernorm.weight": "model-00003-of-00003.safetensors",
198
+ "model.layers.28.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
199
+ "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
200
+ "model.layers.28.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
201
+ "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
202
+ "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
203
+ "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
204
+ "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
205
+ "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
206
+ "model.layers.29.input_layernorm.weight": "model-00003-of-00003.safetensors",
207
+ "model.layers.29.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
208
+ "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
209
+ "model.layers.29.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
210
+ "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
211
+ "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
212
+ "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
213
+ "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
214
+ "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
215
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors",
216
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
217
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
218
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
219
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
220
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
221
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
222
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
223
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
224
+ "model.layers.30.input_layernorm.weight": "model-00003-of-00003.safetensors",
225
+ "model.layers.30.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
226
+ "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
227
+ "model.layers.30.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
228
+ "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
229
+ "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
230
+ "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
231
+ "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
232
+ "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
233
+ "model.layers.31.input_layernorm.weight": "model-00003-of-00003.safetensors",
234
+ "model.layers.31.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
235
+ "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
236
+ "model.layers.31.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
237
+ "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
238
+ "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
239
+ "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
240
+ "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
241
+ "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
242
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors",
243
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
244
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
245
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
246
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
247
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
248
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
249
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
250
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
251
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors",
252
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
253
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
254
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
255
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
256
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
257
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
258
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
259
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
260
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors",
261
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
262
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
263
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
264
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
265
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
266
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
267
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
268
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
269
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors",
270
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
271
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
272
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
273
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
274
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
275
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
276
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
277
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
278
+ "model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors",
279
+ "model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
280
+ "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
281
+ "model.layers.8.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
282
+ "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
283
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
284
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
285
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
286
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
287
+ "model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors",
288
+ "model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
289
+ "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
290
+ "model.layers.9.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
291
+ "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
292
+ "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
293
+ "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors",
294
+ "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
295
+ "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
296
+ "model.norm.weight": "model-00003-of-00003.safetensors"
297
+ }
298
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "total_flos": 0.0,
4
+ "train_loss": 0.5212765811586988,
5
+ "train_runtime": 13234.9919,
6
+ "train_samples": 61135,
7
+ "train_samples_per_second": 4.619,
8
+ "train_steps_per_second": 0.077
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,1572 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 1019,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0009813542688910696,
13
+ "grad_norm": 12.871247750249635,
14
+ "learning_rate": 4.9019607843137254e-09,
15
+ "logits/chosen": 5327.5185546875,
16
+ "logits/rejected": 3678.846435546875,
17
+ "logps/chosen": -222.31866455078125,
18
+ "logps/rejected": -157.3788299560547,
19
+ "loss": 0.6931,
20
+ "rewards/accuracies": 0.0,
21
+ "rewards/chosen": 0.0,
22
+ "rewards/margins": 0.0,
23
+ "rewards/rejected": 0.0,
24
+ "step": 1
25
+ },
26
+ {
27
+ "epoch": 0.009813542688910697,
28
+ "grad_norm": 14.081428099593134,
29
+ "learning_rate": 4.901960784313725e-08,
30
+ "logits/chosen": 5387.51123046875,
31
+ "logits/rejected": 4719.13525390625,
32
+ "logps/chosen": -280.157958984375,
33
+ "logps/rejected": -244.06271362304688,
34
+ "loss": 0.6931,
35
+ "rewards/accuracies": 0.40740740299224854,
36
+ "rewards/chosen": -0.014360553584992886,
37
+ "rewards/margins": -0.05316641554236412,
38
+ "rewards/rejected": 0.03880586475133896,
39
+ "step": 10
40
+ },
41
+ {
42
+ "epoch": 0.019627085377821395,
43
+ "grad_norm": 10.990615121540667,
44
+ "learning_rate": 9.80392156862745e-08,
45
+ "logits/chosen": 4691.1123046875,
46
+ "logits/rejected": 4289.6572265625,
47
+ "logps/chosen": -243.6353302001953,
48
+ "logps/rejected": -236.8662872314453,
49
+ "loss": 0.6931,
50
+ "rewards/accuracies": 0.46666669845581055,
51
+ "rewards/chosen": -0.03324734792113304,
52
+ "rewards/margins": -0.0356144905090332,
53
+ "rewards/rejected": 0.0023671439848840237,
54
+ "step": 20
55
+ },
56
+ {
57
+ "epoch": 0.029440628066732092,
58
+ "grad_norm": 12.484022522013351,
59
+ "learning_rate": 1.4705882352941175e-07,
60
+ "logits/chosen": 5969.29296875,
61
+ "logits/rejected": 5405.775390625,
62
+ "logps/chosen": -284.97119140625,
63
+ "logps/rejected": -282.4980163574219,
64
+ "loss": 0.6922,
65
+ "rewards/accuracies": 0.6250000596046448,
66
+ "rewards/chosen": 0.2023317515850067,
67
+ "rewards/margins": 0.21659104526042938,
68
+ "rewards/rejected": -0.014259283430874348,
69
+ "step": 30
70
+ },
71
+ {
72
+ "epoch": 0.03925417075564279,
73
+ "grad_norm": 13.51105908880634,
74
+ "learning_rate": 1.96078431372549e-07,
75
+ "logits/chosen": 5424.30859375,
76
+ "logits/rejected": 4093.165283203125,
77
+ "logps/chosen": -278.38232421875,
78
+ "logps/rejected": -219.98922729492188,
79
+ "loss": 0.6901,
80
+ "rewards/accuracies": 0.699999988079071,
81
+ "rewards/chosen": 0.5105813145637512,
82
+ "rewards/margins": 0.6657305955886841,
83
+ "rewards/rejected": -0.15514932572841644,
84
+ "step": 40
85
+ },
86
+ {
87
+ "epoch": 0.04906771344455348,
88
+ "grad_norm": 12.244316305452477,
89
+ "learning_rate": 2.4509803921568627e-07,
90
+ "logits/chosen": 5819.39111328125,
91
+ "logits/rejected": 4993.8203125,
92
+ "logps/chosen": -267.16241455078125,
93
+ "logps/rejected": -275.3472595214844,
94
+ "loss": 0.6865,
95
+ "rewards/accuracies": 0.6833333969116211,
96
+ "rewards/chosen": 1.6877946853637695,
97
+ "rewards/margins": 1.0000646114349365,
98
+ "rewards/rejected": 0.6877301931381226,
99
+ "step": 50
100
+ },
101
+ {
102
+ "epoch": 0.058881256133464184,
103
+ "grad_norm": 11.729075229552288,
104
+ "learning_rate": 2.941176470588235e-07,
105
+ "logits/chosen": 6246.43115234375,
106
+ "logits/rejected": 5279.3232421875,
107
+ "logps/chosen": -293.96044921875,
108
+ "logps/rejected": -250.30880737304688,
109
+ "loss": 0.6794,
110
+ "rewards/accuracies": 0.675000011920929,
111
+ "rewards/chosen": 4.434187889099121,
112
+ "rewards/margins": 2.814074993133545,
113
+ "rewards/rejected": 1.6201130151748657,
114
+ "step": 60
115
+ },
116
+ {
117
+ "epoch": 0.06869479882237488,
118
+ "grad_norm": 11.58284928755517,
119
+ "learning_rate": 3.431372549019608e-07,
120
+ "logits/chosen": 5581.76318359375,
121
+ "logits/rejected": 5016.42333984375,
122
+ "logps/chosen": -273.4932556152344,
123
+ "logps/rejected": -272.8643493652344,
124
+ "loss": 0.6728,
125
+ "rewards/accuracies": 0.7083333730697632,
126
+ "rewards/chosen": 4.158146858215332,
127
+ "rewards/margins": 5.391061782836914,
128
+ "rewards/rejected": -1.2329151630401611,
129
+ "step": 70
130
+ },
131
+ {
132
+ "epoch": 0.07850834151128558,
133
+ "grad_norm": 12.989777400848494,
134
+ "learning_rate": 3.92156862745098e-07,
135
+ "logits/chosen": 5730.53759765625,
136
+ "logits/rejected": 4633.5458984375,
137
+ "logps/chosen": -269.62908935546875,
138
+ "logps/rejected": -244.82156372070312,
139
+ "loss": 0.6613,
140
+ "rewards/accuracies": 0.7749999761581421,
141
+ "rewards/chosen": 1.3705421686172485,
142
+ "rewards/margins": 8.697429656982422,
143
+ "rewards/rejected": -7.326887607574463,
144
+ "step": 80
145
+ },
146
+ {
147
+ "epoch": 0.08832188420019627,
148
+ "grad_norm": 14.220448041653073,
149
+ "learning_rate": 4.4117647058823526e-07,
150
+ "logits/chosen": 5785.2666015625,
151
+ "logits/rejected": 5267.29931640625,
152
+ "logps/chosen": -262.34014892578125,
153
+ "logps/rejected": -285.23370361328125,
154
+ "loss": 0.6375,
155
+ "rewards/accuracies": 0.6833333373069763,
156
+ "rewards/chosen": -1.5268057584762573,
157
+ "rewards/margins": 10.330537796020508,
158
+ "rewards/rejected": -11.857342720031738,
159
+ "step": 90
160
+ },
161
+ {
162
+ "epoch": 0.09813542688910697,
163
+ "grad_norm": 15.81206433163477,
164
+ "learning_rate": 4.901960784313725e-07,
165
+ "logits/chosen": 5834.7822265625,
166
+ "logits/rejected": 4743.5556640625,
167
+ "logps/chosen": -311.53265380859375,
168
+ "logps/rejected": -305.3698425292969,
169
+ "loss": 0.6278,
170
+ "rewards/accuracies": 0.7666667103767395,
171
+ "rewards/chosen": -14.513681411743164,
172
+ "rewards/margins": 19.83308219909668,
173
+ "rewards/rejected": -34.346763610839844,
174
+ "step": 100
175
+ },
176
+ {
177
+ "epoch": 0.10794896957801767,
178
+ "grad_norm": 18.563361241995352,
179
+ "learning_rate": 4.999061090193831e-07,
180
+ "logits/chosen": 5575.4599609375,
181
+ "logits/rejected": 5340.49658203125,
182
+ "logps/chosen": -277.6549987792969,
183
+ "logps/rejected": -278.158447265625,
184
+ "loss": 0.6341,
185
+ "rewards/accuracies": 0.6166666746139526,
186
+ "rewards/chosen": -7.583652496337891,
187
+ "rewards/margins": 10.811137199401855,
188
+ "rewards/rejected": -18.394786834716797,
189
+ "step": 110
190
+ },
191
+ {
192
+ "epoch": 0.11776251226692837,
193
+ "grad_norm": 22.063065551890407,
194
+ "learning_rate": 4.995247977764035e-07,
195
+ "logits/chosen": 5714.29443359375,
196
+ "logits/rejected": 5232.7041015625,
197
+ "logps/chosen": -276.466552734375,
198
+ "logps/rejected": -295.88800048828125,
199
+ "loss": 0.6269,
200
+ "rewards/accuracies": 0.6583333015441895,
201
+ "rewards/chosen": -20.445241928100586,
202
+ "rewards/margins": 17.259180068969727,
203
+ "rewards/rejected": -37.70441818237305,
204
+ "step": 120
205
+ },
206
+ {
207
+ "epoch": 0.12757605495583907,
208
+ "grad_norm": 21.227897979315813,
209
+ "learning_rate": 4.988506452457066e-07,
210
+ "logits/chosen": 5282.2646484375,
211
+ "logits/rejected": 4814.9853515625,
212
+ "logps/chosen": -284.6465759277344,
213
+ "logps/rejected": -329.804931640625,
214
+ "loss": 0.6032,
215
+ "rewards/accuracies": 0.6666666865348816,
216
+ "rewards/chosen": -43.58851623535156,
217
+ "rewards/margins": 24.183361053466797,
218
+ "rewards/rejected": -67.7718734741211,
219
+ "step": 130
220
+ },
221
+ {
222
+ "epoch": 0.13738959764474976,
223
+ "grad_norm": 19.667832090255832,
224
+ "learning_rate": 4.9788444260996e-07,
225
+ "logits/chosen": 5482.5751953125,
226
+ "logits/rejected": 5381.85107421875,
227
+ "logps/chosen": -307.1512451171875,
228
+ "logps/rejected": -342.03619384765625,
229
+ "loss": 0.6036,
230
+ "rewards/accuracies": 0.6666667461395264,
231
+ "rewards/chosen": -47.92987823486328,
232
+ "rewards/margins": 22.427753448486328,
233
+ "rewards/rejected": -70.3576431274414,
234
+ "step": 140
235
+ },
236
+ {
237
+ "epoch": 0.14720314033366044,
238
+ "grad_norm": 25.463823735637064,
239
+ "learning_rate": 4.96627323800647e-07,
240
+ "logits/chosen": 5556.36572265625,
241
+ "logits/rejected": 4525.91796875,
242
+ "logps/chosen": -339.99114990234375,
243
+ "logps/rejected": -357.9053649902344,
244
+ "loss": 0.5659,
245
+ "rewards/accuracies": 0.73333340883255,
246
+ "rewards/chosen": -65.89563751220703,
247
+ "rewards/margins": 47.135643005371094,
248
+ "rewards/rejected": -113.0312728881836,
249
+ "step": 150
250
+ },
251
+ {
252
+ "epoch": 0.15701668302257116,
253
+ "grad_norm": 26.781231387453232,
254
+ "learning_rate": 4.95080764167289e-07,
255
+ "logits/chosen": 6055.6474609375,
256
+ "logits/rejected": 5491.48046875,
257
+ "logps/chosen": -350.4269104003906,
258
+ "logps/rejected": -381.8998107910156,
259
+ "loss": 0.5603,
260
+ "rewards/accuracies": 0.6083333492279053,
261
+ "rewards/chosen": -62.65166473388672,
262
+ "rewards/margins": 36.19008255004883,
263
+ "rewards/rejected": -98.84175109863281,
264
+ "step": 160
265
+ },
266
+ {
267
+ "epoch": 0.16683022571148184,
268
+ "grad_norm": 28.113973023052374,
269
+ "learning_rate": 4.932465787459808e-07,
270
+ "logits/chosen": 5991.466796875,
271
+ "logits/rejected": 5234.6416015625,
272
+ "logps/chosen": -302.66656494140625,
273
+ "logps/rejected": -343.98358154296875,
274
+ "loss": 0.5567,
275
+ "rewards/accuracies": 0.7250000238418579,
276
+ "rewards/chosen": -39.85801315307617,
277
+ "rewards/margins": 54.232994079589844,
278
+ "rewards/rejected": -94.09100341796875,
279
+ "step": 170
280
+ },
281
+ {
282
+ "epoch": 0.17664376840039253,
283
+ "grad_norm": 33.999159471041786,
284
+ "learning_rate": 4.911269201292724e-07,
285
+ "logits/chosen": 5687.16943359375,
286
+ "logits/rejected": 5025.896484375,
287
+ "logps/chosen": -303.44134521484375,
288
+ "logps/rejected": -364.39190673828125,
289
+ "loss": 0.5816,
290
+ "rewards/accuracies": 0.6666666865348816,
291
+ "rewards/chosen": -62.167022705078125,
292
+ "rewards/margins": 51.05744552612305,
293
+ "rewards/rejected": -113.2244644165039,
294
+ "step": 180
295
+ },
296
+ {
297
+ "epoch": 0.18645731108930325,
298
+ "grad_norm": 29.641088692190937,
299
+ "learning_rate": 4.887242759398945e-07,
300
+ "logits/chosen": 6036.60205078125,
301
+ "logits/rejected": 5355.47216796875,
302
+ "logps/chosen": -337.2464294433594,
303
+ "logps/rejected": -388.3368835449219,
304
+ "loss": 0.5383,
305
+ "rewards/accuracies": 0.7333332896232605,
306
+ "rewards/chosen": -57.11214065551758,
307
+ "rewards/margins": 51.385711669921875,
308
+ "rewards/rejected": -108.49784851074219,
309
+ "step": 190
310
+ },
311
+ {
312
+ "epoch": 0.19627085377821393,
313
+ "grad_norm": 44.68902740164567,
314
+ "learning_rate": 4.860414659112948e-07,
315
+ "logits/chosen": 6272.4951171875,
316
+ "logits/rejected": 5538.49609375,
317
+ "logps/chosen": -370.70849609375,
318
+ "logps/rejected": -407.4710998535156,
319
+ "loss": 0.5638,
320
+ "rewards/accuracies": 0.7250000238418579,
321
+ "rewards/chosen": -84.62608337402344,
322
+ "rewards/margins": 62.12910079956055,
323
+ "rewards/rejected": -146.75518798828125,
324
+ "step": 200
325
+ },
326
+ {
327
+ "epoch": 0.20608439646712462,
328
+ "grad_norm": 25.411968927521745,
329
+ "learning_rate": 4.830816385784104e-07,
330
+ "logits/chosen": 4968.16015625,
331
+ "logits/rejected": 4779.7099609375,
332
+ "logps/chosen": -331.57757568359375,
333
+ "logps/rejected": -343.7427062988281,
334
+ "loss": 0.5589,
335
+ "rewards/accuracies": 0.6583333611488342,
336
+ "rewards/chosen": -78.5858383178711,
337
+ "rewards/margins": 39.72159957885742,
338
+ "rewards/rejected": -118.30744934082031,
339
+ "step": 210
340
+ },
341
+ {
342
+ "epoch": 0.21589793915603533,
343
+ "grad_norm": 37.405063992584424,
344
+ "learning_rate": 4.798482675825602e-07,
345
+ "logits/chosen": 5361.2626953125,
346
+ "logits/rejected": 5484.0341796875,
347
+ "logps/chosen": -311.9710388183594,
348
+ "logps/rejected": -405.7643127441406,
349
+ "loss": 0.5245,
350
+ "rewards/accuracies": 0.7833333611488342,
351
+ "rewards/chosen": -71.02371215820312,
352
+ "rewards/margins": 85.1280746459961,
353
+ "rewards/rejected": -156.1517791748047,
354
+ "step": 220
355
+ },
356
+ {
357
+ "epoch": 0.22571148184494602,
358
+ "grad_norm": 50.62400555207534,
359
+ "learning_rate": 4.7634514759479275e-07,
360
+ "logits/chosen": 6291.7314453125,
361
+ "logits/rejected": 4984.1982421875,
362
+ "logps/chosen": -361.0018615722656,
363
+ "logps/rejected": -410.3404846191406,
364
+ "loss": 0.5001,
365
+ "rewards/accuracies": 0.7500000596046448,
366
+ "rewards/chosen": -79.9288330078125,
367
+ "rewards/margins": 78.11891174316406,
368
+ "rewards/rejected": -158.04774475097656,
369
+ "step": 230
370
+ },
371
+ {
372
+ "epoch": 0.23552502453385674,
373
+ "grad_norm": 27.660916558165752,
374
+ "learning_rate": 4.7257638986247684e-07,
375
+ "logits/chosen": 6535.8984375,
376
+ "logits/rejected": 5374.02294921875,
377
+ "logps/chosen": -426.83148193359375,
378
+ "logps/rejected": -457.632080078125,
379
+ "loss": 0.516,
380
+ "rewards/accuracies": 0.7083333730697632,
381
+ "rewards/chosen": -109.15034484863281,
382
+ "rewards/margins": 92.73652648925781,
383
+ "rewards/rejected": -201.88687133789062,
384
+ "step": 240
385
+ },
386
+ {
387
+ "epoch": 0.24533856722276742,
388
+ "grad_norm": 31.148002019742822,
389
+ "learning_rate": 4.685464173843574e-07,
390
+ "logits/chosen": 5497.865234375,
391
+ "logits/rejected": 4737.041015625,
392
+ "logps/chosen": -371.4256591796875,
393
+ "logps/rejected": -383.71661376953125,
394
+ "loss": 0.5543,
395
+ "rewards/accuracies": 0.6166666746139526,
396
+ "rewards/chosen": -120.2467269897461,
397
+ "rewards/margins": 42.96506881713867,
398
+ "rewards/rejected": -163.21180725097656,
399
+ "step": 250
400
+ },
401
+ {
402
+ "epoch": 0.25515210991167814,
403
+ "grad_norm": 31.571604145354506,
404
+ "learning_rate": 4.6425995971974265e-07,
405
+ "logits/chosen": 5646.7626953125,
406
+ "logits/rejected": 5109.78369140625,
407
+ "logps/chosen": -389.2139587402344,
408
+ "logps/rejected": -417.52374267578125,
409
+ "loss": 0.5557,
410
+ "rewards/accuracies": 0.6333333253860474,
411
+ "rewards/chosen": -108.80330657958984,
412
+ "rewards/margins": 49.87716293334961,
413
+ "rewards/rejected": -158.6804656982422,
414
+ "step": 260
415
+ },
416
+ {
417
+ "epoch": 0.2649656526005888,
418
+ "grad_norm": 35.26099277826172,
419
+ "learning_rate": 4.597220474379125e-07,
420
+ "logits/chosen": 5891.14990234375,
421
+ "logits/rejected": 4710.44384765625,
422
+ "logps/chosen": -349.8431701660156,
423
+ "logps/rejected": -394.3140869140625,
424
+ "loss": 0.5564,
425
+ "rewards/accuracies": 0.6416667103767395,
426
+ "rewards/chosen": -90.45411682128906,
427
+ "rewards/margins": 59.028533935546875,
428
+ "rewards/rejected": -149.48263549804688,
429
+ "step": 270
430
+ },
431
+ {
432
+ "epoch": 0.2747791952894995,
433
+ "grad_norm": 48.24359930236473,
434
+ "learning_rate": 4.549380062142627e-07,
435
+ "logits/chosen": 5449.0,
436
+ "logits/rejected": 4662.09521484375,
437
+ "logps/chosen": -345.41461181640625,
438
+ "logps/rejected": -420.5967712402344,
439
+ "loss": 0.5258,
440
+ "rewards/accuracies": 0.7250000238418579,
441
+ "rewards/chosen": -90.24136352539062,
442
+ "rewards/margins": 88.40531158447266,
443
+ "rewards/rejected": -178.64669799804688,
444
+ "step": 280
445
+ },
446
+ {
447
+ "epoch": 0.2845927379784102,
448
+ "grad_norm": 29.807942673069554,
449
+ "learning_rate": 4.499134505801141e-07,
450
+ "logits/chosen": 6478.8251953125,
451
+ "logits/rejected": 5145.69580078125,
452
+ "logps/chosen": -425.2914123535156,
453
+ "logps/rejected": -475.61224365234375,
454
+ "loss": 0.5069,
455
+ "rewards/accuracies": 0.7750000357627869,
456
+ "rewards/chosen": -114.30363464355469,
457
+ "rewards/margins": 90.24967193603516,
458
+ "rewards/rejected": -204.55331420898438,
459
+ "step": 290
460
+ },
461
+ {
462
+ "epoch": 0.2944062806673209,
463
+ "grad_norm": 35.28572084013644,
464
+ "learning_rate": 4.4465427733352124e-07,
465
+ "logits/chosen": 5390.82275390625,
466
+ "logits/rejected": 5010.67919921875,
467
+ "logps/chosen": -404.703125,
468
+ "logps/rejected": -445.12921142578125,
469
+ "loss": 0.5624,
470
+ "rewards/accuracies": 0.6416667103767395,
471
+ "rewards/chosen": -142.99755859375,
472
+ "rewards/margins": 53.75908279418945,
473
+ "rewards/rejected": -196.75662231445312,
474
+ "step": 300
475
+ },
476
+ {
477
+ "epoch": 0.3042198233562316,
478
+ "grad_norm": 40.084769146081335,
479
+ "learning_rate": 4.391666586188145e-07,
480
+ "logits/chosen": 5972.5166015625,
481
+ "logits/rejected": 5158.81103515625,
482
+ "logps/chosen": -387.8962707519531,
483
+ "logps/rejected": -440.2579040527344,
484
+ "loss": 0.515,
485
+ "rewards/accuracies": 0.6916667222976685,
486
+ "rewards/chosen": -106.87181091308594,
487
+ "rewards/margins": 80.19728088378906,
488
+ "rewards/rejected": -187.06912231445312,
489
+ "step": 310
490
+ },
491
+ {
492
+ "epoch": 0.3140333660451423,
493
+ "grad_norm": 28.62016537121461,
494
+ "learning_rate": 4.3345703468299634e-07,
495
+ "logits/chosen": 5544.9384765625,
496
+ "logits/rejected": 4833.5224609375,
497
+ "logps/chosen": -360.482421875,
498
+ "logps/rejected": -389.13385009765625,
499
+ "loss": 0.5356,
500
+ "rewards/accuracies": 0.7333332896232605,
501
+ "rewards/chosen": -99.99230194091797,
502
+ "rewards/margins": 53.11186599731445,
503
+ "rewards/rejected": -153.10415649414062,
504
+ "step": 320
505
+ },
506
+ {
507
+ "epoch": 0.323846908734053,
508
+ "grad_norm": 59.032426150017336,
509
+ "learning_rate": 4.275321063174936e-07,
510
+ "logits/chosen": 5484.0458984375,
511
+ "logits/rejected": 4950.25537109375,
512
+ "logps/chosen": -403.98785400390625,
513
+ "logps/rejected": -519.8248291015625,
514
+ "loss": 0.4914,
515
+ "rewards/accuracies": 0.7333333492279053,
516
+ "rewards/chosen": -141.48928833007812,
517
+ "rewards/margins": 106.94425964355469,
518
+ "rewards/rejected": -248.4335479736328,
519
+ "step": 330
520
+ },
521
+ {
522
+ "epoch": 0.3336604514229637,
523
+ "grad_norm": 31.879122250786,
524
+ "learning_rate": 4.2139882699413613e-07,
525
+ "logits/chosen": 5405.72265625,
526
+ "logits/rejected": 4280.78857421875,
527
+ "logps/chosen": -441.2384338378906,
528
+ "logps/rejected": -502.58209228515625,
529
+ "loss": 0.5114,
530
+ "rewards/accuracies": 0.7750000357627869,
531
+ "rewards/chosen": -169.95379638671875,
532
+ "rewards/margins": 94.90937805175781,
533
+ "rewards/rejected": -264.8631286621094,
534
+ "step": 340
535
+ },
536
+ {
537
+ "epoch": 0.3434739941118744,
538
+ "grad_norm": 33.81913218379805,
539
+ "learning_rate": 4.1506439470459056e-07,
540
+ "logits/chosen": 6440.4052734375,
541
+ "logits/rejected": 4974.0732421875,
542
+ "logps/chosen": -476.2796325683594,
543
+ "logps/rejected": -503.81915283203125,
544
+ "loss": 0.4891,
545
+ "rewards/accuracies": 0.783333420753479,
546
+ "rewards/chosen": -159.8102569580078,
547
+ "rewards/margins": 94.52180480957031,
548
+ "rewards/rejected": -254.3320770263672,
549
+ "step": 350
550
+ },
551
+ {
552
+ "epoch": 0.35328753680078506,
553
+ "grad_norm": 27.826546606872803,
554
+ "learning_rate": 4.085362435128262e-07,
555
+ "logits/chosen": 5557.1865234375,
556
+ "logits/rejected": 5118.06640625,
557
+ "logps/chosen": -378.259033203125,
558
+ "logps/rejected": -449.55145263671875,
559
+ "loss": 0.5236,
560
+ "rewards/accuracies": 0.6666666865348816,
561
+ "rewards/chosen": -135.6257781982422,
562
+ "rewards/margins": 67.34868621826172,
563
+ "rewards/rejected": -202.97447204589844,
564
+ "step": 360
565
+ },
566
+ {
567
+ "epoch": 0.3631010794896958,
568
+ "grad_norm": 35.17289058393036,
569
+ "learning_rate": 4.0182203483052825e-07,
570
+ "logits/chosen": 6366.83056640625,
571
+ "logits/rejected": 5257.0703125,
572
+ "logps/chosen": -399.1999206542969,
573
+ "logps/rejected": -466.55255126953125,
574
+ "loss": 0.4778,
575
+ "rewards/accuracies": 0.8083333969116211,
576
+ "rewards/chosen": -125.0527572631836,
577
+ "rewards/margins": 93.51255798339844,
578
+ "rewards/rejected": -218.56527709960938,
579
+ "step": 370
580
+ },
581
+ {
582
+ "epoch": 0.3729146221786065,
583
+ "grad_norm": 38.44052471860425,
584
+ "learning_rate": 3.949296484256959e-07,
585
+ "logits/chosen": 5621.7138671875,
586
+ "logits/rejected": 5390.65478515625,
587
+ "logps/chosen": -457.83837890625,
588
+ "logps/rejected": -548.638427734375,
589
+ "loss": 0.5489,
590
+ "rewards/accuracies": 0.7583333253860474,
591
+ "rewards/chosen": -193.8985137939453,
592
+ "rewards/margins": 95.32550048828125,
593
+ "rewards/rejected": -289.2240295410156,
594
+ "step": 380
595
+ },
596
+ {
597
+ "epoch": 0.38272816486751715,
598
+ "grad_norm": 36.66931449629898,
599
+ "learning_rate": 3.8786717317497875e-07,
600
+ "logits/chosen": 5111.90576171875,
601
+ "logits/rejected": 4626.9228515625,
602
+ "logps/chosen": -434.62835693359375,
603
+ "logps/rejected": -526.7476806640625,
604
+ "loss": 0.4832,
605
+ "rewards/accuracies": 0.7500000596046448,
606
+ "rewards/chosen": -197.80410766601562,
607
+ "rewards/margins": 103.44625091552734,
608
+ "rewards/rejected": -301.2503662109375,
609
+ "step": 390
610
+ },
611
+ {
612
+ "epoch": 0.39254170755642787,
613
+ "grad_norm": 57.59980893341699,
614
+ "learning_rate": 3.806428975706042e-07,
615
+ "logits/chosen": 6388.87158203125,
616
+ "logits/rejected": 4657.2216796875,
617
+ "logps/chosen": -454.86175537109375,
618
+ "logps/rejected": -485.8328552246094,
619
+ "loss": 0.4911,
620
+ "rewards/accuracies": 0.7583333253860474,
621
+ "rewards/chosen": -156.0497283935547,
622
+ "rewards/margins": 103.42845153808594,
623
+ "rewards/rejected": -259.4781494140625,
624
+ "step": 400
625
+ },
626
+ {
627
+ "epoch": 0.4023552502453386,
628
+ "grad_norm": 39.54551097407698,
629
+ "learning_rate": 3.7326529999303633e-07,
630
+ "logits/chosen": 6277.59228515625,
631
+ "logits/rejected": 5186.5234375,
632
+ "logps/chosen": -436.134521484375,
633
+ "logps/rejected": -489.72021484375,
634
+ "loss": 0.5039,
635
+ "rewards/accuracies": 0.7583333253860474,
636
+ "rewards/chosen": -157.28512573242188,
637
+ "rewards/margins": 91.26525115966797,
638
+ "rewards/rejected": -248.55038452148438,
639
+ "step": 410
640
+ },
641
+ {
642
+ "epoch": 0.41216879293424924,
643
+ "grad_norm": 28.9828512823159,
644
+ "learning_rate": 3.6574303876078366e-07,
645
+ "logits/chosen": 6166.8349609375,
646
+ "logits/rejected": 5749.53759765625,
647
+ "logps/chosen": -429.44842529296875,
648
+ "logps/rejected": -501.61553955078125,
649
+ "loss": 0.5346,
650
+ "rewards/accuracies": 0.7250000238418579,
651
+ "rewards/chosen": -146.19595336914062,
652
+ "rewards/margins": 76.49540710449219,
653
+ "rewards/rejected": -222.6913604736328,
654
+ "step": 420
655
+ },
656
+ {
657
+ "epoch": 0.42198233562315995,
658
+ "grad_norm": 38.109059334795454,
659
+ "learning_rate": 3.5808494196903117e-07,
660
+ "logits/chosen": 5872.1611328125,
661
+ "logits/rejected": 5257.36962890625,
662
+ "logps/chosen": -426.01629638671875,
663
+ "logps/rejected": -524.32470703125,
664
+ "loss": 0.4893,
665
+ "rewards/accuracies": 0.7249999642372131,
666
+ "rewards/chosen": -147.16848754882812,
667
+ "rewards/margins": 97.02165222167969,
668
+ "rewards/rejected": -244.1901397705078,
669
+ "step": 430
670
+ },
671
+ {
672
+ "epoch": 0.43179587831207067,
673
+ "grad_norm": 36.48516188691919,
674
+ "learning_rate": 3.5029999712902387e-07,
675
+ "logits/chosen": 5825.1708984375,
676
+ "logits/rejected": 5375.9892578125,
677
+ "logps/chosen": -412.44866943359375,
678
+ "logps/rejected": -492.4132385253906,
679
+ "loss": 0.5089,
680
+ "rewards/accuracies": 0.7583333849906921,
681
+ "rewards/chosen": -137.3380584716797,
682
+ "rewards/margins": 78.8681640625,
683
+ "rewards/rejected": -216.2062225341797,
684
+ "step": 440
685
+ },
686
+ {
687
+ "epoch": 0.44160942100098133,
688
+ "grad_norm": 32.455988556697044,
689
+ "learning_rate": 3.4239734062036067e-07,
690
+ "logits/chosen": 5395.5947265625,
691
+ "logits/rejected": 4995.49267578125,
692
+ "logps/chosen": -377.55206298828125,
693
+ "logps/rejected": -480.9095764160156,
694
+ "loss": 0.504,
695
+ "rewards/accuracies": 0.8166666030883789,
696
+ "rewards/chosen": -116.18603515625,
697
+ "rewards/margins": 91.22578430175781,
698
+ "rewards/rejected": -207.4118194580078,
699
+ "step": 450
700
+ },
701
+ {
702
+ "epoch": 0.45142296368989204,
703
+ "grad_norm": 40.54519554942862,
704
+ "learning_rate": 3.343862469685755e-07,
705
+ "logits/chosen": 5598.1201171875,
706
+ "logits/rejected": 5239.931640625,
707
+ "logps/chosen": -418.7256774902344,
708
+ "logps/rejected": -504.2228088378906,
709
+ "loss": 0.492,
710
+ "rewards/accuracies": 0.7749999761581421,
711
+ "rewards/chosen": -155.0460662841797,
712
+ "rewards/margins": 88.58525848388672,
713
+ "rewards/rejected": -243.63131713867188,
714
+ "step": 460
715
+ },
716
+ {
717
+ "epoch": 0.46123650637880276,
718
+ "grad_norm": 63.388353478656484,
719
+ "learning_rate": 3.2627611796059283e-07,
720
+ "logits/chosen": 6118.2041015625,
721
+ "logits/rejected": 4867.0166015625,
722
+ "logps/chosen": -513.0584716796875,
723
+ "logps/rejected": -580.1785278320312,
724
+ "loss": 0.4852,
725
+ "rewards/accuracies": 0.6833333373069763,
726
+ "rewards/chosen": -215.5017547607422,
727
+ "rewards/margins": 102.20874786376953,
728
+ "rewards/rejected": -317.71051025390625,
729
+ "step": 470
730
+ },
731
+ {
732
+ "epoch": 0.47105004906771347,
733
+ "grad_norm": 53.006728665147484,
734
+ "learning_rate": 3.1807647161082797e-07,
735
+ "logits/chosen": 6796.0439453125,
736
+ "logits/rejected": 4991.43505859375,
737
+ "logps/chosen": -505.2491760253906,
738
+ "logps/rejected": -591.8897094726562,
739
+ "loss": 0.467,
740
+ "rewards/accuracies": 0.7666667103767395,
741
+ "rewards/chosen": -224.52243041992188,
742
+ "rewards/margins": 122.7418441772461,
743
+ "rewards/rejected": -347.2642822265625,
744
+ "step": 480
745
+ },
746
+ {
747
+ "epoch": 0.48086359175662413,
748
+ "grad_norm": 45.433668803945835,
749
+ "learning_rate": 3.097969309908847e-07,
750
+ "logits/chosen": 6198.6357421875,
751
+ "logits/rejected": 4938.93701171875,
752
+ "logps/chosen": -537.33154296875,
753
+ "logps/rejected": -604.7870483398438,
754
+ "loss": 0.4907,
755
+ "rewards/accuracies": 0.7750000357627869,
756
+ "rewards/chosen": -252.5094451904297,
757
+ "rewards/margins": 102.22807312011719,
758
+ "rewards/rejected": -354.7375183105469,
759
+ "step": 490
760
+ },
761
+ {
762
+ "epoch": 0.49067713444553485,
763
+ "grad_norm": 40.30222762902004,
764
+ "learning_rate": 3.01447212935957e-07,
765
+ "logits/chosen": 5542.91015625,
766
+ "logits/rejected": 4886.0283203125,
767
+ "logps/chosen": -515.2832641601562,
768
+ "logps/rejected": -611.3238525390625,
769
+ "loss": 0.4764,
770
+ "rewards/accuracies": 0.8166667222976685,
771
+ "rewards/chosen": -230.98831176757812,
772
+ "rewards/margins": 120.3188247680664,
773
+ "rewards/rejected": -351.30718994140625,
774
+ "step": 500
775
+ },
776
+ {
777
+ "epoch": 0.5004906771344455,
778
+ "grad_norm": 37.25943332077513,
779
+ "learning_rate": 2.930371166411915e-07,
780
+ "logits/chosen": 6290.35107421875,
781
+ "logits/rejected": 5406.603515625,
782
+ "logps/chosen": -496.71923828125,
783
+ "logps/rejected": -568.7760009765625,
784
+ "loss": 0.5204,
785
+ "rewards/accuracies": 0.7416666746139526,
786
+ "rewards/chosen": -171.8844451904297,
787
+ "rewards/margins": 106.14483642578125,
788
+ "rewards/rejected": -278.02923583984375,
789
+ "step": 510
790
+ },
791
+ {
792
+ "epoch": 0.5103042198233563,
793
+ "grad_norm": 34.219197608369974,
794
+ "learning_rate": 2.845765121613912e-07,
795
+ "logits/chosen": 5363.45361328125,
796
+ "logits/rejected": 4926.47705078125,
797
+ "logps/chosen": -400.9844665527344,
798
+ "logps/rejected": -468.4186096191406,
799
+ "loss": 0.4843,
800
+ "rewards/accuracies": 0.7583333253860474,
801
+ "rewards/chosen": -137.99566650390625,
802
+ "rewards/margins": 86.77244567871094,
803
+ "rewards/rejected": -224.7681121826172,
804
+ "step": 520
805
+ },
806
+ {
807
+ "epoch": 0.5201177625122669,
808
+ "grad_norm": 51.08115197166243,
809
+ "learning_rate": 2.760753288275598e-07,
810
+ "logits/chosen": 6380.15380859375,
811
+ "logits/rejected": 5523.56103515625,
812
+ "logps/chosen": -411.37030029296875,
813
+ "logps/rejected": -479.7333984375,
814
+ "loss": 0.524,
815
+ "rewards/accuracies": 0.7166666984558105,
816
+ "rewards/chosen": -120.66414642333984,
817
+ "rewards/margins": 82.25830841064453,
818
+ "rewards/rejected": -202.92245483398438,
819
+ "step": 530
820
+ },
821
+ {
822
+ "epoch": 0.5299313052011776,
823
+ "grad_norm": 32.10452041832907,
824
+ "learning_rate": 2.675435435938788e-07,
825
+ "logits/chosen": 5805.7861328125,
826
+ "logits/rejected": 4628.6015625,
827
+ "logps/chosen": -400.0195617675781,
828
+ "logps/rejected": -493.15631103515625,
829
+ "loss": 0.4989,
830
+ "rewards/accuracies": 0.7916666269302368,
831
+ "rewards/chosen": -143.0798797607422,
832
+ "rewards/margins": 111.36392974853516,
833
+ "rewards/rejected": -254.4438018798828,
834
+ "step": 540
835
+ },
836
+ {
837
+ "epoch": 0.5397448478900884,
838
+ "grad_norm": 35.50341902811831,
839
+ "learning_rate": 2.5899116932879534e-07,
840
+ "logits/chosen": 5951.2255859375,
841
+ "logits/rejected": 5129.73291015625,
842
+ "logps/chosen": -436.9695739746094,
843
+ "logps/rejected": -521.4527587890625,
844
+ "loss": 0.4679,
845
+ "rewards/accuracies": 0.7166666984558105,
846
+ "rewards/chosen": -170.31277465820312,
847
+ "rewards/margins": 103.4649658203125,
848
+ "rewards/rejected": -273.77777099609375,
849
+ "step": 550
850
+ },
851
+ {
852
+ "epoch": 0.549558390578999,
853
+ "grad_norm": 38.923123039929806,
854
+ "learning_rate": 2.504282430639594e-07,
855
+ "logits/chosen": 5168.88427734375,
856
+ "logits/rejected": 4690.22412109375,
857
+ "logps/chosen": -454.4593811035156,
858
+ "logps/rejected": -523.0364990234375,
859
+ "loss": 0.5234,
860
+ "rewards/accuracies": 0.6500000357627869,
861
+ "rewards/chosen": -206.7684783935547,
862
+ "rewards/margins": 83.95912170410156,
863
+ "rewards/rejected": -290.72760009765625,
864
+ "step": 560
865
+ },
866
+ {
867
+ "epoch": 0.5593719332679097,
868
+ "grad_norm": 35.9015213923173,
869
+ "learning_rate": 2.418648142148056e-07,
870
+ "logits/chosen": 5650.38818359375,
871
+ "logits/rejected": 4686.87158203125,
872
+ "logps/chosen": -421.58416748046875,
873
+ "logps/rejected": -519.3839721679688,
874
+ "loss": 0.4912,
875
+ "rewards/accuracies": 0.783333420753479,
876
+ "rewards/chosen": -161.98081970214844,
877
+ "rewards/margins": 115.42814636230469,
878
+ "rewards/rejected": -277.4089660644531,
879
+ "step": 570
880
+ },
881
+ {
882
+ "epoch": 0.5691854759568205,
883
+ "grad_norm": 35.377631368601875,
884
+ "learning_rate": 2.3331093278659906e-07,
885
+ "logits/chosen": 6001.3486328125,
886
+ "logits/rejected": 5075.9619140625,
887
+ "logps/chosen": -444.90069580078125,
888
+ "logps/rejected": -534.0222778320312,
889
+ "loss": 0.4834,
890
+ "rewards/accuracies": 0.73333340883255,
891
+ "rewards/chosen": -150.86428833007812,
892
+ "rewards/margins": 100.94398498535156,
893
+ "rewards/rejected": -251.80825805664062,
894
+ "step": 580
895
+ },
896
+ {
897
+ "epoch": 0.5789990186457311,
898
+ "grad_norm": 33.72847930978894,
899
+ "learning_rate": 2.247766375797906e-07,
900
+ "logits/chosen": 6150.4951171875,
901
+ "logits/rejected": 5650.3603515625,
902
+ "logps/chosen": -447.9390563964844,
903
+ "logps/rejected": -580.2978515625,
904
+ "loss": 0.459,
905
+ "rewards/accuracies": 0.800000011920929,
906
+ "rewards/chosen": -173.01307678222656,
907
+ "rewards/margins": 128.7034454345703,
908
+ "rewards/rejected": -301.71649169921875,
909
+ "step": 590
910
+ },
911
+ {
912
+ "epoch": 0.5888125613346418,
913
+ "grad_norm": 38.272687769078246,
914
+ "learning_rate": 2.1627194440852142e-07,
915
+ "logits/chosen": 5934.83935546875,
916
+ "logits/rejected": 5138.47705078125,
917
+ "logps/chosen": -510.39532470703125,
918
+ "logps/rejected": -600.4871826171875,
919
+ "loss": 0.516,
920
+ "rewards/accuracies": 0.7583333849906921,
921
+ "rewards/chosen": -226.30844116210938,
922
+ "rewards/margins": 95.1800765991211,
923
+ "rewards/rejected": -321.4884948730469,
924
+ "step": 600
925
+ },
926
+ {
927
+ "epoch": 0.5986261040235525,
928
+ "grad_norm": 42.72545572301978,
929
+ "learning_rate": 2.0780683434610413e-07,
930
+ "logits/chosen": 5760.5244140625,
931
+ "logits/rejected": 4755.18798828125,
932
+ "logps/chosen": -520.7589721679688,
933
+ "logps/rejected": -605.10546875,
934
+ "loss": 0.4979,
935
+ "rewards/accuracies": 0.7833333611488342,
936
+ "rewards/chosen": -230.55068969726562,
937
+ "rewards/margins": 103.2475357055664,
938
+ "rewards/rejected": -333.7981872558594,
939
+ "step": 610
940
+ },
941
+ {
942
+ "epoch": 0.6084396467124632,
943
+ "grad_norm": 30.800890611965162,
944
+ "learning_rate": 1.993912420112756e-07,
945
+ "logits/chosen": 6323.02978515625,
946
+ "logits/rejected": 5290.75927734375,
947
+ "logps/chosen": -529.4403686523438,
948
+ "logps/rejected": -628.4583129882812,
949
+ "loss": 0.5034,
950
+ "rewards/accuracies": 0.75,
951
+ "rewards/chosen": -250.78018188476562,
952
+ "rewards/margins": 104.14213562011719,
953
+ "rewards/rejected": -354.92236328125,
954
+ "step": 620
955
+ },
956
+ {
957
+ "epoch": 0.6182531894013739,
958
+ "grad_norm": 33.05143266331657,
959
+ "learning_rate": 1.9103504390896944e-07,
960
+ "logits/chosen": 6340.01025390625,
961
+ "logits/rejected": 5427.24755859375,
962
+ "logps/chosen": -559.9760131835938,
963
+ "logps/rejected": -633.686767578125,
964
+ "loss": 0.4884,
965
+ "rewards/accuracies": 0.7583334445953369,
966
+ "rewards/chosen": -264.83856201171875,
967
+ "rewards/margins": 85.63264465332031,
968
+ "rewards/rejected": -350.47125244140625,
969
+ "step": 630
970
+ },
971
+ {
972
+ "epoch": 0.6280667320902846,
973
+ "grad_norm": 39.56952674823438,
974
+ "learning_rate": 1.8274804683928913e-07,
975
+ "logits/chosen": 5424.0146484375,
976
+ "logits/rejected": 4903.7958984375,
977
+ "logps/chosen": -535.6927490234375,
978
+ "logps/rejected": -647.5748901367188,
979
+ "loss": 0.4892,
980
+ "rewards/accuracies": 0.7583333253860474,
981
+ "rewards/chosen": -269.99102783203125,
982
+ "rewards/margins": 118.67204284667969,
983
+ "rewards/rejected": -388.6630554199219,
984
+ "step": 640
985
+ },
986
+ {
987
+ "epoch": 0.6378802747791953,
988
+ "grad_norm": 55.248010597812424,
989
+ "learning_rate": 1.745399763882881e-07,
990
+ "logits/chosen": 5793.76953125,
991
+ "logits/rejected": 4353.64794921875,
992
+ "logps/chosen": -535.369140625,
993
+ "logps/rejected": -589.8530883789062,
994
+ "loss": 0.4828,
995
+ "rewards/accuracies": 0.7583333253860474,
996
+ "rewards/chosen": -241.2775421142578,
997
+ "rewards/margins": 122.86383056640625,
998
+ "rewards/rejected": -364.1413879394531,
999
+ "step": 650
1000
+ },
1001
+ {
1002
+ "epoch": 0.647693817468106,
1003
+ "grad_norm": 38.643520028392174,
1004
+ "learning_rate": 1.664204655140607e-07,
1005
+ "logits/chosen": 6159.14306640625,
1006
+ "logits/rejected": 4976.43994140625,
1007
+ "logps/chosen": -499.28851318359375,
1008
+ "logps/rejected": -561.6052856445312,
1009
+ "loss": 0.495,
1010
+ "rewards/accuracies": 0.7166666984558105,
1011
+ "rewards/chosen": -212.77145385742188,
1012
+ "rewards/margins": 93.96333312988281,
1013
+ "rewards/rejected": -306.73480224609375,
1014
+ "step": 660
1015
+ },
1016
+ {
1017
+ "epoch": 0.6575073601570167,
1018
+ "grad_norm": 35.07622366892728,
1019
+ "learning_rate": 1.5839904324154273e-07,
1020
+ "logits/chosen": 5574.2802734375,
1021
+ "logits/rejected": 4987.9404296875,
1022
+ "logps/chosen": -466.86346435546875,
1023
+ "logps/rejected": -580.9351196289062,
1024
+ "loss": 0.4938,
1025
+ "rewards/accuracies": 0.7583333253860474,
1026
+ "rewards/chosen": -179.73626708984375,
1027
+ "rewards/margins": 136.65802001953125,
1028
+ "rewards/rejected": -316.3943176269531,
1029
+ "step": 670
1030
+ },
1031
+ {
1032
+ "epoch": 0.6673209028459274,
1033
+ "grad_norm": 33.542518567077636,
1034
+ "learning_rate": 1.5048512347928564e-07,
1035
+ "logits/chosen": 6700.78515625,
1036
+ "logits/rejected": 5496.53662109375,
1037
+ "logps/chosen": -503.79290771484375,
1038
+ "logps/rejected": -590.5035400390625,
1039
+ "loss": 0.4429,
1040
+ "rewards/accuracies": 0.7499999403953552,
1041
+ "rewards/chosen": -193.58926391601562,
1042
+ "rewards/margins": 135.96713256835938,
1043
+ "rewards/rejected": -329.556396484375,
1044
+ "step": 680
1045
+ },
1046
+ {
1047
+ "epoch": 0.677134445534838,
1048
+ "grad_norm": 34.78474391764019,
1049
+ "learning_rate": 1.426879939713322e-07,
1050
+ "logits/chosen": 5514.447265625,
1051
+ "logits/rejected": 4842.81640625,
1052
+ "logps/chosen": -472.7972717285156,
1053
+ "logps/rejected": -572.2882690429688,
1054
+ "loss": 0.5124,
1055
+ "rewards/accuracies": 0.7333332896232605,
1056
+ "rewards/chosen": -220.92953491210938,
1057
+ "rewards/margins": 110.0331039428711,
1058
+ "rewards/rejected": -330.9626159667969,
1059
+ "step": 690
1060
+ },
1061
+ {
1062
+ "epoch": 0.6869479882237488,
1063
+ "grad_norm": 39.067983682803174,
1064
+ "learning_rate": 1.350168053971577e-07,
1065
+ "logits/chosen": 5970.7685546875,
1066
+ "logits/rejected": 5311.5283203125,
1067
+ "logps/chosen": -452.698974609375,
1068
+ "logps/rejected": -518.3038330078125,
1069
+ "loss": 0.4982,
1070
+ "rewards/accuracies": 0.7500000596046448,
1071
+ "rewards/chosen": -172.0897979736328,
1072
+ "rewards/margins": 97.96910095214844,
1073
+ "rewards/rejected": -270.05889892578125,
1074
+ "step": 700
1075
+ },
1076
+ {
1077
+ "epoch": 0.6967615309126595,
1078
+ "grad_norm": 40.38105094076238,
1079
+ "learning_rate": 1.2748056063246994e-07,
1080
+ "logits/chosen": 5575.70458984375,
1081
+ "logits/rejected": 5063.31884765625,
1082
+ "logps/chosen": -460.80413818359375,
1083
+ "logps/rejected": -541.817138671875,
1084
+ "loss": 0.5068,
1085
+ "rewards/accuracies": 0.824999988079071,
1086
+ "rewards/chosen": -161.07498168945312,
1087
+ "rewards/margins": 101.1529541015625,
1088
+ "rewards/rejected": -262.2279357910156,
1089
+ "step": 710
1090
+ },
1091
+ {
1092
+ "epoch": 0.7065750736015701,
1093
+ "grad_norm": 34.54330420809715,
1094
+ "learning_rate": 1.2008810418347093e-07,
1095
+ "logits/chosen": 5857.0908203125,
1096
+ "logits/rejected": 5070.1689453125,
1097
+ "logps/chosen": -448.393798828125,
1098
+ "logps/rejected": -511.27130126953125,
1099
+ "loss": 0.4955,
1100
+ "rewards/accuracies": 0.7750000357627869,
1101
+ "rewards/chosen": -160.17967224121094,
1102
+ "rewards/margins": 94.85597229003906,
1103
+ "rewards/rejected": -255.03564453125,
1104
+ "step": 720
1105
+ },
1106
+ {
1107
+ "epoch": 0.7163886162904809,
1108
+ "grad_norm": 36.199421295115016,
1109
+ "learning_rate": 1.128481118069799e-07,
1110
+ "logits/chosen": 5848.61279296875,
1111
+ "logits/rejected": 4546.04296875,
1112
+ "logps/chosen": -461.7185974121094,
1113
+ "logps/rejected": -540.6113891601562,
1114
+ "loss": 0.4906,
1115
+ "rewards/accuracies": 0.6916666626930237,
1116
+ "rewards/chosen": -193.16610717773438,
1117
+ "rewards/margins": 106.0004653930664,
1118
+ "rewards/rejected": -299.16656494140625,
1119
+ "step": 730
1120
+ },
1121
+ {
1122
+ "epoch": 0.7262021589793916,
1123
+ "grad_norm": 58.298464182056584,
1124
+ "learning_rate": 1.0576908032860088e-07,
1125
+ "logits/chosen": 5177.734375,
1126
+ "logits/rejected": 4254.4931640625,
1127
+ "logps/chosen": -439.21923828125,
1128
+ "logps/rejected": -490.22210693359375,
1129
+ "loss": 0.4902,
1130
+ "rewards/accuracies": 0.73333340883255,
1131
+ "rewards/chosen": -179.5984344482422,
1132
+ "rewards/margins": 106.27181243896484,
1133
+ "rewards/rejected": -285.8702697753906,
1134
+ "step": 740
1135
+ },
1136
+ {
1137
+ "epoch": 0.7360157016683022,
1138
+ "grad_norm": 47.06791612169973,
1139
+ "learning_rate": 9.88593176708827e-08,
1140
+ "logits/chosen": 5833.16748046875,
1141
+ "logits/rejected": 4599.1416015625,
1142
+ "logps/chosen": -447.70770263671875,
1143
+ "logps/rejected": -503.156005859375,
1144
+ "loss": 0.4893,
1145
+ "rewards/accuracies": 0.7250000238418579,
1146
+ "rewards/chosen": -168.36907958984375,
1147
+ "rewards/margins": 97.07537078857422,
1148
+ "rewards/rejected": -265.4444580078125,
1149
+ "step": 750
1150
+ },
1151
+ {
1152
+ "epoch": 0.745829244357213,
1153
+ "grad_norm": 37.1160086085217,
1154
+ "learning_rate": 9.212693310317479e-08,
1155
+ "logits/chosen": 5141.75390625,
1156
+ "logits/rejected": 4296.54833984375,
1157
+ "logps/chosen": -440.88067626953125,
1158
+ "logps/rejected": -532.016845703125,
1159
+ "loss": 0.509,
1160
+ "rewards/accuracies": 0.7000000476837158,
1161
+ "rewards/chosen": -181.9151153564453,
1162
+ "rewards/margins": 105.42694091796875,
1163
+ "rewards/rejected": -287.34210205078125,
1164
+ "step": 760
1165
+ },
1166
+ {
1167
+ "epoch": 0.7556427870461236,
1168
+ "grad_norm": 43.097373457610466,
1169
+ "learning_rate": 8.557982772462138e-08,
1170
+ "logits/chosen": 5532.06689453125,
1171
+ "logits/rejected": 4944.3828125,
1172
+ "logps/chosen": -424.0889587402344,
1173
+ "logps/rejected": -537.22802734375,
1174
+ "loss": 0.4679,
1175
+ "rewards/accuracies": 0.8083333969116211,
1176
+ "rewards/chosen": -165.0626983642578,
1177
+ "rewards/margins": 116.6352767944336,
1178
+ "rewards/rejected": -281.6979675292969,
1179
+ "step": 770
1180
+ },
1181
+ {
1182
+ "epoch": 0.7654563297350343,
1183
+ "grad_norm": 51.31695327547084,
1184
+ "learning_rate": 7.922568519146425e-08,
1185
+ "logits/chosen": 5383.9931640625,
1186
+ "logits/rejected": 4821.4970703125,
1187
+ "logps/chosen": -442.91583251953125,
1188
+ "logps/rejected": -547.6976928710938,
1189
+ "loss": 0.4878,
1190
+ "rewards/accuracies": 0.6833333969116211,
1191
+ "rewards/chosen": -190.0575714111328,
1192
+ "rewards/margins": 98.0093002319336,
1193
+ "rewards/rejected": -288.06683349609375,
1194
+ "step": 780
1195
+ },
1196
+ {
1197
+ "epoch": 0.7752698724239451,
1198
+ "grad_norm": 40.87283215033227,
1199
+ "learning_rate": 7.307196269953444e-08,
1200
+ "logits/chosen": 5953.62646484375,
1201
+ "logits/rejected": 4360.71435546875,
1202
+ "logps/chosen": -468.15301513671875,
1203
+ "logps/rejected": -554.8399658203125,
1204
+ "loss": 0.4513,
1205
+ "rewards/accuracies": 0.7500000596046448,
1206
+ "rewards/chosen": -173.2440185546875,
1207
+ "rewards/margins": 138.4357147216797,
1208
+ "rewards/rejected": -311.67974853515625,
1209
+ "step": 790
1210
+ },
1211
+ {
1212
+ "epoch": 0.7850834151128557,
1213
+ "grad_norm": 53.01816227936955,
1214
+ "learning_rate": 6.712588223251809e-08,
1215
+ "logits/chosen": 5890.1064453125,
1216
+ "logits/rejected": 5068.29052734375,
1217
+ "logps/chosen": -507.1546936035156,
1218
+ "logps/rejected": -587.9667358398438,
1219
+ "loss": 0.4932,
1220
+ "rewards/accuracies": 0.6666666865348816,
1221
+ "rewards/chosen": -197.91702270507812,
1222
+ "rewards/margins": 104.29020690917969,
1223
+ "rewards/rejected": -302.20721435546875,
1224
+ "step": 800
1225
+ },
1226
+ {
1227
+ "epoch": 0.7948969578017664,
1228
+ "grad_norm": 39.36718486541899,
1229
+ "learning_rate": 6.139442208626517e-08,
1230
+ "logits/chosen": 5642.1572265625,
1231
+ "logits/rejected": 5064.44140625,
1232
+ "logps/chosen": -466.017822265625,
1233
+ "logps/rejected": -542.1941528320312,
1234
+ "loss": 0.5086,
1235
+ "rewards/accuracies": 0.7416666746139526,
1236
+ "rewards/chosen": -191.46595764160156,
1237
+ "rewards/margins": 93.6746597290039,
1238
+ "rewards/rejected": -285.140625,
1239
+ "step": 810
1240
+ },
1241
+ {
1242
+ "epoch": 0.8047105004906772,
1243
+ "grad_norm": 32.25150411325172,
1244
+ "learning_rate": 5.5884308679090525e-08,
1245
+ "logits/chosen": 6617.20166015625,
1246
+ "logits/rejected": 5841.89990234375,
1247
+ "logps/chosen": -489.13140869140625,
1248
+ "logps/rejected": -556.3676147460938,
1249
+ "loss": 0.4687,
1250
+ "rewards/accuracies": 0.7666667103767395,
1251
+ "rewards/chosen": -174.83181762695312,
1252
+ "rewards/margins": 96.47708129882812,
1253
+ "rewards/rejected": -271.30889892578125,
1254
+ "step": 820
1255
+ },
1256
+ {
1257
+ "epoch": 0.8145240431795878,
1258
+ "grad_norm": 42.66355926716236,
1259
+ "learning_rate": 5.060200865767605e-08,
1260
+ "logits/chosen": 5482.3115234375,
1261
+ "logits/rejected": 4349.36181640625,
1262
+ "logps/chosen": -489.5411071777344,
1263
+ "logps/rejected": -519.50439453125,
1264
+ "loss": 0.4808,
1265
+ "rewards/accuracies": 0.7250000238418579,
1266
+ "rewards/chosen": -183.46530151367188,
1267
+ "rewards/margins": 100.88998413085938,
1268
+ "rewards/rejected": -284.3552551269531,
1269
+ "step": 830
1270
+ },
1271
+ {
1272
+ "epoch": 0.8243375858684985,
1273
+ "grad_norm": 32.60506546982286,
1274
+ "learning_rate": 4.555372130784102e-08,
1275
+ "logits/chosen": 6099.6806640625,
1276
+ "logits/rejected": 5423.52294921875,
1277
+ "logps/chosen": -430.18377685546875,
1278
+ "logps/rejected": -559.9306640625,
1279
+ "loss": 0.4656,
1280
+ "rewards/accuracies": 0.7583333253860474,
1281
+ "rewards/chosen": -174.9810028076172,
1282
+ "rewards/margins": 115.120361328125,
1283
+ "rewards/rejected": -290.10137939453125,
1284
+ "step": 840
1285
+ },
1286
+ {
1287
+ "epoch": 0.8341511285574092,
1288
+ "grad_norm": 28.976991791091827,
1289
+ "learning_rate": 4.0745371279084976e-08,
1290
+ "logits/chosen": 6144.4287109375,
1291
+ "logits/rejected": 5391.69189453125,
1292
+ "logps/chosen": -456.33270263671875,
1293
+ "logps/rejected": -532.3670654296875,
1294
+ "loss": 0.5206,
1295
+ "rewards/accuracies": 0.7583333253860474,
1296
+ "rewards/chosen": -177.6209716796875,
1297
+ "rewards/margins": 89.14666748046875,
1298
+ "rewards/rejected": -266.76763916015625,
1299
+ "step": 850
1300
+ },
1301
+ {
1302
+ "epoch": 0.8439646712463199,
1303
+ "grad_norm": 35.3584148587086,
1304
+ "learning_rate": 3.6182601631443596e-08,
1305
+ "logits/chosen": 6054.46142578125,
1306
+ "logits/rejected": 5496.1396484375,
1307
+ "logps/chosen": -492.4789123535156,
1308
+ "logps/rejected": -586.8856811523438,
1309
+ "loss": 0.462,
1310
+ "rewards/accuracies": 0.7916666269302368,
1311
+ "rewards/chosen": -178.38107299804688,
1312
+ "rewards/margins": 115.93116760253906,
1313
+ "rewards/rejected": -294.312255859375,
1314
+ "step": 860
1315
+ },
1316
+ {
1317
+ "epoch": 0.8537782139352306,
1318
+ "grad_norm": 34.93609119738404,
1319
+ "learning_rate": 3.187076721281595e-08,
1320
+ "logits/chosen": 5244.7314453125,
1321
+ "logits/rejected": 4227.8193359375,
1322
+ "logps/chosen": -435.866943359375,
1323
+ "logps/rejected": -531.3182983398438,
1324
+ "loss": 0.4827,
1325
+ "rewards/accuracies": 0.7416667342185974,
1326
+ "rewards/chosen": -182.42454528808594,
1327
+ "rewards/margins": 120.93087005615234,
1328
+ "rewards/rejected": -303.35540771484375,
1329
+ "step": 870
1330
+ },
1331
+ {
1332
+ "epoch": 0.8635917566241413,
1333
+ "grad_norm": 42.21210418756789,
1334
+ "learning_rate": 2.7814928374537334e-08,
1335
+ "logits/chosen": 6968.44384765625,
1336
+ "logits/rejected": 5644.8955078125,
1337
+ "logps/chosen": -539.173828125,
1338
+ "logps/rejected": -613.7080078125,
1339
+ "loss": 0.4725,
1340
+ "rewards/accuracies": 0.7916666865348816,
1341
+ "rewards/chosen": -196.6251220703125,
1342
+ "rewards/margins": 135.3355255126953,
1343
+ "rewards/rejected": -331.96063232421875,
1344
+ "step": 880
1345
+ },
1346
+ {
1347
+ "epoch": 0.873405299313052,
1348
+ "grad_norm": 48.62475030995162,
1349
+ "learning_rate": 2.4019845032570875e-08,
1350
+ "logits/chosen": 6289.82763671875,
1351
+ "logits/rejected": 4878.1728515625,
1352
+ "logps/chosen": -469.8004455566406,
1353
+ "logps/rejected": -565.7530517578125,
1354
+ "loss": 0.4788,
1355
+ "rewards/accuracies": 0.8166667222976685,
1356
+ "rewards/chosen": -185.22061157226562,
1357
+ "rewards/margins": 133.4977264404297,
1358
+ "rewards/rejected": -318.7183532714844,
1359
+ "step": 890
1360
+ },
1361
+ {
1362
+ "epoch": 0.8832188420019627,
1363
+ "grad_norm": 35.719224006833315,
1364
+ "learning_rate": 2.0489971081290193e-08,
1365
+ "logits/chosen": 5738.51318359375,
1366
+ "logits/rejected": 4603.50439453125,
1367
+ "logps/chosen": -483.54791259765625,
1368
+ "logps/rejected": -549.1290283203125,
1369
+ "loss": 0.4808,
1370
+ "rewards/accuracies": 0.7249999642372131,
1371
+ "rewards/chosen": -206.58328247070312,
1372
+ "rewards/margins": 97.61729431152344,
1373
+ "rewards/rejected": -304.2005920410156,
1374
+ "step": 900
1375
+ },
1376
+ {
1377
+ "epoch": 0.8930323846908734,
1378
+ "grad_norm": 43.132524531606194,
1379
+ "learning_rate": 1.7229449166406477e-08,
1380
+ "logits/chosen": 5693.8486328125,
1381
+ "logits/rejected": 4534.4052734375,
1382
+ "logps/chosen": -469.5682067871094,
1383
+ "logps/rejected": -569.1848754882812,
1384
+ "loss": 0.4531,
1385
+ "rewards/accuracies": 0.8083332777023315,
1386
+ "rewards/chosen": -204.99484252929688,
1387
+ "rewards/margins": 132.3409881591797,
1388
+ "rewards/rejected": -337.3358154296875,
1389
+ "step": 910
1390
+ },
1391
+ {
1392
+ "epoch": 0.9028459273797841,
1393
+ "grad_norm": 34.73048158998948,
1394
+ "learning_rate": 1.4242105823176837e-08,
1395
+ "logits/chosen": 6962.6904296875,
1396
+ "logits/rejected": 5748.6943359375,
1397
+ "logps/chosen": -525.45068359375,
1398
+ "logps/rejected": -564.1856689453125,
1399
+ "loss": 0.4516,
1400
+ "rewards/accuracies": 0.7250000238418579,
1401
+ "rewards/chosen": -192.100830078125,
1402
+ "rewards/margins": 99.60045623779297,
1403
+ "rewards/rejected": -291.7012939453125,
1404
+ "step": 920
1405
+ },
1406
+ {
1407
+ "epoch": 0.9126594700686947,
1408
+ "grad_norm": 34.47893895251098,
1409
+ "learning_rate": 1.1531446985597604e-08,
1410
+ "logits/chosen": 5990.88525390625,
1411
+ "logits/rejected": 5583.560546875,
1412
+ "logps/chosen": -485.2509765625,
1413
+ "logps/rejected": -572.419921875,
1414
+ "loss": 0.4586,
1415
+ "rewards/accuracies": 0.8250001072883606,
1416
+ "rewards/chosen": -192.66342163085938,
1417
+ "rewards/margins": 110.31678771972656,
1418
+ "rewards/rejected": -302.9801940917969,
1419
+ "step": 930
1420
+ },
1421
+ {
1422
+ "epoch": 0.9224730127576055,
1423
+ "grad_norm": 38.65102653124819,
1424
+ "learning_rate": 9.100653871854963e-09,
1425
+ "logits/chosen": 5348.1103515625,
1426
+ "logits/rejected": 4875.837890625,
1427
+ "logps/chosen": -461.71697998046875,
1428
+ "logps/rejected": -564.880126953125,
1429
+ "loss": 0.4878,
1430
+ "rewards/accuracies": 0.7583333253860474,
1431
+ "rewards/chosen": -193.52760314941406,
1432
+ "rewards/margins": 111.4560546875,
1433
+ "rewards/rejected": -304.98358154296875,
1434
+ "step": 940
1435
+ },
1436
+ {
1437
+ "epoch": 0.9322865554465162,
1438
+ "grad_norm": 35.36010410132843,
1439
+ "learning_rate": 6.9525792508597634e-09,
1440
+ "logits/chosen": 5099.234375,
1441
+ "logits/rejected": 4961.53466796875,
1442
+ "logps/chosen": -456.7210388183594,
1443
+ "logps/rejected": -571.4191284179688,
1444
+ "loss": 0.496,
1445
+ "rewards/accuracies": 0.7500000596046448,
1446
+ "rewards/chosen": -211.7642364501953,
1447
+ "rewards/margins": 97.53651428222656,
1448
+ "rewards/rejected": -309.30072021484375,
1449
+ "step": 950
1450
+ },
1451
+ {
1452
+ "epoch": 0.9421000981354269,
1453
+ "grad_norm": 60.3061901160388,
1454
+ "learning_rate": 5.089744094249837e-09,
1455
+ "logits/chosen": 6198.19091796875,
1456
+ "logits/rejected": 5164.39013671875,
1457
+ "logps/chosen": -477.7798767089844,
1458
+ "logps/rejected": -606.0765991210938,
1459
+ "loss": 0.4522,
1460
+ "rewards/accuracies": 0.8500000238418579,
1461
+ "rewards/chosen": -207.751953125,
1462
+ "rewards/margins": 137.81539916992188,
1463
+ "rewards/rejected": -345.56732177734375,
1464
+ "step": 960
1465
+ },
1466
+ {
1467
+ "epoch": 0.9519136408243376,
1468
+ "grad_norm": 38.92788774449534,
1469
+ "learning_rate": 3.5143346177878565e-09,
1470
+ "logits/chosen": 6070.90673828125,
1471
+ "logits/rejected": 5626.92578125,
1472
+ "logps/chosen": -508.4833068847656,
1473
+ "logps/rejected": -613.7086791992188,
1474
+ "loss": 0.489,
1475
+ "rewards/accuracies": 0.7750000357627869,
1476
+ "rewards/chosen": -202.98733520507812,
1477
+ "rewards/margins": 117.0799560546875,
1478
+ "rewards/rejected": -320.0672912597656,
1479
+ "step": 970
1480
+ },
1481
+ {
1482
+ "epoch": 0.9617271835132483,
1483
+ "grad_norm": 54.09136710237634,
1484
+ "learning_rate": 2.2281997156273213e-09,
1485
+ "logits/chosen": 6383.44775390625,
1486
+ "logits/rejected": 5800.46484375,
1487
+ "logps/chosen": -531.06884765625,
1488
+ "logps/rejected": -637.2273559570312,
1489
+ "loss": 0.5199,
1490
+ "rewards/accuracies": 0.7500000596046448,
1491
+ "rewards/chosen": -214.9575958251953,
1492
+ "rewards/margins": 97.85444641113281,
1493
+ "rewards/rejected": -312.81207275390625,
1494
+ "step": 980
1495
+ },
1496
+ {
1497
+ "epoch": 0.971540726202159,
1498
+ "grad_norm": 37.58754664399141,
1499
+ "learning_rate": 1.2328487904580131e-09,
1500
+ "logits/chosen": 5965.31982421875,
1501
+ "logits/rejected": 4487.17431640625,
1502
+ "logps/chosen": -527.6492919921875,
1503
+ "logps/rejected": -595.473876953125,
1504
+ "loss": 0.4605,
1505
+ "rewards/accuracies": 0.7833333611488342,
1506
+ "rewards/chosen": -219.84896850585938,
1507
+ "rewards/margins": 120.89280700683594,
1508
+ "rewards/rejected": -340.7417907714844,
1509
+ "step": 990
1510
+ },
1511
+ {
1512
+ "epoch": 0.9813542688910697,
1513
+ "grad_norm": 29.779875569619673,
1514
+ "learning_rate": 5.29449982077046e-10,
1515
+ "logits/chosen": 5757.50439453125,
1516
+ "logits/rejected": 5476.619140625,
1517
+ "logps/chosen": -460.2972106933594,
1518
+ "logps/rejected": -551.8410034179688,
1519
+ "loss": 0.4694,
1520
+ "rewards/accuracies": 0.7249999642372131,
1521
+ "rewards/chosen": -186.51734924316406,
1522
+ "rewards/margins": 90.38455963134766,
1523
+ "rewards/rejected": -276.90191650390625,
1524
+ "step": 1000
1525
+ },
1526
+ {
1527
+ "epoch": 0.9911678115799804,
1528
+ "grad_norm": 27.069636754429258,
1529
+ "learning_rate": 1.1882879646485379e-10,
1530
+ "logits/chosen": 6565.1044921875,
1531
+ "logits/rejected": 5212.9794921875,
1532
+ "logps/chosen": -543.57421875,
1533
+ "logps/rejected": -614.15185546875,
1534
+ "loss": 0.4409,
1535
+ "rewards/accuracies": 0.7416666746139526,
1536
+ "rewards/chosen": -200.32835388183594,
1537
+ "rewards/margins": 119.9702377319336,
1538
+ "rewards/rejected": -320.298583984375,
1539
+ "step": 1010
1540
+ },
1541
+ {
1542
+ "epoch": 1.0,
1543
+ "step": 1019,
1544
+ "total_flos": 0.0,
1545
+ "train_loss": 0.5212765811586988,
1546
+ "train_runtime": 13234.9919,
1547
+ "train_samples_per_second": 4.619,
1548
+ "train_steps_per_second": 0.077
1549
+ }
1550
+ ],
1551
+ "logging_steps": 10,
1552
+ "max_steps": 1019,
1553
+ "num_input_tokens_seen": 0,
1554
+ "num_train_epochs": 1,
1555
+ "save_steps": 1000,
1556
+ "stateful_callbacks": {
1557
+ "TrainerControl": {
1558
+ "args": {
1559
+ "should_epoch_stop": false,
1560
+ "should_evaluate": false,
1561
+ "should_log": false,
1562
+ "should_save": true,
1563
+ "should_training_stop": false
1564
+ },
1565
+ "attributes": {}
1566
+ }
1567
+ },
1568
+ "total_flos": 0.0,
1569
+ "train_batch_size": 6,
1570
+ "trial_name": null,
1571
+ "trial_params": null
1572
+ }