beamaia commited on
Commit
50dff7f
·
verified ·
1 Parent(s): 1223e11

Training in progress, step 100, checkpoint

Browse files
checkpoint-100/adapter_config.json CHANGED
@@ -20,8 +20,8 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "k_proj",
24
  "v_proj",
 
25
  "q_proj",
26
  "o_proj"
27
  ],
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "v_proj",
24
+ "k_proj",
25
  "q_proj",
26
  "o_proj"
27
  ],
checkpoint-100/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93e3eccf3c7b4d2974e51d6b92a5fce3e04e1b639c3dc38faaf6f751b12e54d4
3
  size 54560368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5241ed443e49e9c020cf18aba141ea2252452880579106c649b5cef461681070
3
  size 54560368
checkpoint-100/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d7dd72473bbc4ceefd500632843f8b5a46c9b458ab506c8cc5259e47189e489
3
  size 109267450
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76a58f1d251840512383b3a097d369b8a76dc422086924bacff92444568585f1
3
  size 109267450
checkpoint-100/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_metric": 0.35499998927116394,
3
- "best_model_checkpoint": "./zephyr/08-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.10-KTO_Hyperparameter search, altering desired and undesired weights for KTO task.-2_max_steps-145_batch_16_2024-04-08_ppid_9/checkpoint-100",
4
  "epoch": 0.684931506849315,
5
  "eval_steps": 50,
6
  "global_step": 100,
@@ -11,14 +11,14 @@
11
  {
12
  "epoch": 0.14,
13
  "grad_norm": 0.0,
14
- "kl": 0.0,
15
  "learning_rate": 0.00018,
16
- "logps/chosen": -1121.033935546875,
17
- "logps/rejected": -1175.0645751953125,
18
- "loss": 0.3883,
19
- "rewards/chosen": -82.42558288574219,
20
- "rewards/margins": 3.8134756088256836,
21
- "rewards/rejected": -86.0301284790039,
22
  "step": 20
23
  },
24
  {
@@ -26,26 +26,26 @@
26
  "grad_norm": 0.0,
27
  "kl": 0.0,
28
  "learning_rate": 0.00015142857142857143,
29
- "logps/chosen": -2772.499755859375,
30
- "logps/rejected": -2552.62548828125,
31
- "loss": 0.3352,
32
- "rewards/chosen": -248.261474609375,
33
- "rewards/margins": -23.709930419921875,
34
- "rewards/rejected": -226.1365203857422,
35
  "step": 40
36
  },
37
  {
38
  "epoch": 0.34,
39
  "eval_kl": 0.0,
40
- "eval_logps/chosen": -2637.12109375,
41
- "eval_logps/rejected": -2308.955810546875,
42
  "eval_loss": 0.35499998927116394,
43
- "eval_rewards/chosen": -235.4595489501953,
44
- "eval_rewards/margins": -32.44633865356445,
45
- "eval_rewards/rejected": -204.63710021972656,
46
- "eval_runtime": 140.6369,
47
- "eval_samples_per_second": 2.133,
48
- "eval_steps_per_second": 0.533,
49
  "step": 50
50
  },
51
  {
@@ -53,12 +53,12 @@
53
  "grad_norm": 0.0,
54
  "kl": 0.0,
55
  "learning_rate": 0.00012285714285714287,
56
- "logps/chosen": -2785.7900390625,
57
- "logps/rejected": -2503.834716796875,
58
- "loss": 0.3516,
59
- "rewards/chosen": -249.9789581298828,
60
- "rewards/margins": -26.628849029541016,
61
- "rewards/rejected": -222.65147399902344,
62
  "step": 60
63
  },
64
  {
@@ -66,12 +66,12 @@
66
  "grad_norm": 0.0,
67
  "kl": 0.0,
68
  "learning_rate": 9.428571428571429e-05,
69
- "logps/chosen": -2890.97216796875,
70
- "logps/rejected": -2390.66064453125,
71
- "loss": 0.3773,
72
- "rewards/chosen": -259.9905700683594,
73
- "rewards/margins": -50.821651458740234,
74
- "rewards/rejected": -211.66656494140625,
75
  "step": 80
76
  },
77
  {
@@ -79,26 +79,26 @@
79
  "grad_norm": 0.0,
80
  "kl": 0.0,
81
  "learning_rate": 6.571428571428571e-05,
82
- "logps/chosen": -2552.4580078125,
83
- "logps/rejected": -2739.7529296875,
84
- "loss": 0.3609,
85
- "rewards/chosen": -227.57888793945312,
86
- "rewards/margins": 18.942209243774414,
87
- "rewards/rejected": -243.08050537109375,
88
  "step": 100
89
  },
90
  {
91
  "epoch": 0.68,
92
  "eval_kl": 0.0,
93
- "eval_logps/chosen": -2662.06640625,
94
- "eval_logps/rejected": -2326.6005859375,
95
  "eval_loss": 0.35499998927116394,
96
- "eval_rewards/chosen": -237.95407104492188,
97
- "eval_rewards/margins": -33.08881378173828,
98
- "eval_rewards/rejected": -206.40155029296875,
99
- "eval_runtime": 140.6813,
100
- "eval_samples_per_second": 2.132,
101
- "eval_steps_per_second": 0.533,
102
  "step": 100
103
  }
104
  ],
 
1
  {
2
  "best_metric": 0.35499998927116394,
3
+ "best_model_checkpoint": "./zephyr/09-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.10-KTO_Hyperparameter search, altering desired and undesired weights for KTO task.-2_max_steps-145_batch_16_2024-04-09_ppid_9/checkpoint-100",
4
  "epoch": 0.684931506849315,
5
  "eval_steps": 50,
6
  "global_step": 100,
 
11
  {
12
  "epoch": 0.14,
13
  "grad_norm": 0.0,
14
+ "kl": 0.6947981119155884,
15
  "learning_rate": 0.00018,
16
+ "logps/chosen": -534.4633178710938,
17
+ "logps/rejected": -551.6043090820312,
18
+ "loss": 0.3763,
19
+ "rewards/chosen": -24.06700325012207,
20
+ "rewards/margins": 3.339742660522461,
21
+ "rewards/rejected": -27.222640991210938,
22
  "step": 20
23
  },
24
  {
 
26
  "grad_norm": 0.0,
27
  "kl": 0.0,
28
  "learning_rate": 0.00015142857142857143,
29
+ "logps/chosen": -1181.132568359375,
30
+ "logps/rejected": -1268.249755859375,
31
+ "loss": 0.3703,
32
+ "rewards/chosen": -89.00174713134766,
33
+ "rewards/margins": 8.45933723449707,
34
+ "rewards/rejected": -97.4486083984375,
35
  "step": 40
36
  },
37
  {
38
  "epoch": 0.34,
39
  "eval_kl": 0.0,
40
+ "eval_logps/chosen": -1383.1231689453125,
41
+ "eval_logps/rejected": -1366.6656494140625,
42
  "eval_loss": 0.35499998927116394,
43
+ "eval_rewards/chosen": -110.769287109375,
44
+ "eval_rewards/margins": 0.34473782777786255,
45
+ "eval_rewards/rejected": -110.63328552246094,
46
+ "eval_runtime": 139.1234,
47
+ "eval_samples_per_second": 2.156,
48
+ "eval_steps_per_second": 0.539,
49
  "step": 50
50
  },
51
  {
 
53
  "grad_norm": 0.0,
54
  "kl": 0.0,
55
  "learning_rate": 0.00012285714285714287,
56
+ "logps/chosen": -1373.5347900390625,
57
+ "logps/rejected": -1470.8179931640625,
58
+ "loss": 0.3422,
59
+ "rewards/chosen": -108.9572525024414,
60
+ "rewards/margins": 11.698455810546875,
61
+ "rewards/rejected": -120.54151916503906,
62
  "step": 60
63
  },
64
  {
 
66
  "grad_norm": 0.0,
67
  "kl": 0.0,
68
  "learning_rate": 9.428571428571429e-05,
69
+ "logps/chosen": -1384.841796875,
70
+ "logps/rejected": -1508.3975830078125,
71
+ "loss": 0.3703,
72
+ "rewards/chosen": -111.45362854003906,
73
+ "rewards/margins": 9.210695266723633,
74
+ "rewards/rejected": -121.47576904296875,
75
  "step": 80
76
  },
77
  {
 
79
  "grad_norm": 0.0,
80
  "kl": 0.0,
81
  "learning_rate": 6.571428571428571e-05,
82
+ "logps/chosen": -1428.02587890625,
83
+ "logps/rejected": -1513.79248046875,
84
+ "loss": 0.3352,
85
+ "rewards/chosen": -113.59603118896484,
86
+ "rewards/margins": 8.474186897277832,
87
+ "rewards/rejected": -121.59716796875,
88
  "step": 100
89
  },
90
  {
91
  "epoch": 0.68,
92
  "eval_kl": 0.0,
93
+ "eval_logps/chosen": -1404.312255859375,
94
+ "eval_logps/rejected": -1386.0108642578125,
95
  "eval_loss": 0.35499998927116394,
96
+ "eval_rewards/chosen": -112.88818359375,
97
+ "eval_rewards/margins": 0.1723746657371521,
98
+ "eval_rewards/rejected": -112.56780242919922,
99
+ "eval_runtime": 139.1821,
100
+ "eval_samples_per_second": 2.155,
101
+ "eval_steps_per_second": 0.539,
102
  "step": 100
103
  }
104
  ],
checkpoint-100/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7259373241079ee9fe031c2749e484f10fc64310d3a73a5dc994aaad8cbc23eb
3
  size 5688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4e7471c208358007e16769013c8f13853110f1e8d489247cdb5f44357b48bc7
3
  size 5688