jssky commited on
Commit
152e2a2
·
verified ·
1 Parent(s): 2dec5cb

Training in progress, step 9, checkpoint

Browse files
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48b347053b06b3c619535ebc10f28a094fccd287e10c86393ad57e0922ee6235
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f037179e2f72f8052c567935428a2b336875a279f6a258c94c81c6eda52ca9a7
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6625b271af64d78b0cb21cd2181dec43ed26cbdaf7c144448da19978546a31f
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:095e7bf64776870b00aad21b5d20fedc85a0ea0239296af560cd4be957fbf562
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33fa0c08b85797649036972335a8cd73b92d730982f9c6331cf9c606a49c949f
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c7ea82e6f493a6775b5a4cd8618f2ddb43db4405ab77c46f046a09ea763e869
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33597b58fea822d53100c04ad5bf6dac139f8ee7ebeccc1eeff0d0f0c5705911
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d74456313175ad9b8126678aa9443bfe6356fb4c5d663a059450d3f27587e784
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6e8dc2cb5ce182cb23e477a3c983eadfda3e659ec882f4a3cc3deb0891b781e
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14afc2d953ce2b165f4da7b4bca19afec155ff1d5805a54379b64f4c34bfd6a9
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b680b229241711413bb5dfcac9eefb7d9456a473f950e8a7e515db460a59ca55
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed57cfed487b9d17510809e80ef03db1018bf9797a8332cd83d8598a8bf8276d
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bae373d23ff7545032b25d9d95aaecf06cb3497437725166582db64db01deef4
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9ca5a0af0b68b10972e265eafc5246b223cb21b68105b011c346e7cb46fee64
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:734648406d7d04253f7d286c1e4582bd2c99e8fd94850aee383e98ceb2e73b85
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8061970669cd0d8e3da8a8bb44cab4dd909f46d3ec4e8db91363c4c8a1e4dc2
3
  size 15984
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.00967741935483871,
5
  "eval_steps": 3,
6
- "global_step": 6,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -73,6 +73,35 @@
73
  "eval_samples_per_second": 43.73,
74
  "eval_steps_per_second": 5.487,
75
  "step": 6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  }
77
  ],
78
  "logging_steps": 1,
@@ -92,7 +121,7 @@
92
  "attributes": {}
93
  }
94
  },
95
- "total_flos": 1.770634359078912e+16,
96
  "train_batch_size": 1,
97
  "trial_name": null,
98
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.014516129032258065,
5
  "eval_steps": 3,
6
+ "global_step": 9,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
73
  "eval_samples_per_second": 43.73,
74
  "eval_steps_per_second": 5.487,
75
  "step": 6
76
+ },
77
+ {
78
+ "epoch": 0.01129032258064516,
79
+ "grad_norm": NaN,
80
+ "learning_rate": 0.0,
81
+ "loss": 9.2474,
82
+ "step": 7
83
+ },
84
+ {
85
+ "epoch": 0.012903225806451613,
86
+ "grad_norm": NaN,
87
+ "learning_rate": 0.0,
88
+ "loss": 9.5729,
89
+ "step": 8
90
+ },
91
+ {
92
+ "epoch": 0.014516129032258065,
93
+ "grad_norm": NaN,
94
+ "learning_rate": 0.0,
95
+ "loss": 9.5663,
96
+ "step": 9
97
+ },
98
+ {
99
+ "epoch": 0.014516129032258065,
100
+ "eval_loss": 9.502001762390137,
101
+ "eval_runtime": 23.8764,
102
+ "eval_samples_per_second": 43.725,
103
+ "eval_steps_per_second": 5.487,
104
+ "step": 9
105
  }
106
  ],
107
  "logging_steps": 1,
 
121
  "attributes": {}
122
  }
123
  },
124
+ "total_flos": 2.655951538618368e+16,
125
  "train_batch_size": 1,
126
  "trial_name": null,
127
  "trial_params": null