Training in progress, step 60000
Browse files- README.md +4 -5
- logs/foo/bar.tb/events.out.tfevents.1726905162.1c1a426a2fee +3 -0
- logs/learning_rate=0.0002, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8, warmup_ratio=0.1/events.out.tfevents.1726983816.1c1a426a2fee +3 -0
- logs/learning_rate=0.0002, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1726951701.1c1a426a2fee +3 -0
- logs/learning_rate=0.0002, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1726983258.1c1a426a2fee +3 -0
- logs/learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8, warmup_ratio=0.1/events.out.tfevents.1726822881.1c1a426a2fee +3 -0
- logs/learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8, warmup_ratio=0.1/events.out.tfevents.1726854655.1c1a426a2fee +3 -0
- logs/learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8, warmup_ratio=0.1/events.out.tfevents.1726919574.1c1a426a2fee +3 -0
- logs/learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8, warmup_ratio=0.1/events.out.tfevents.1726951144.1c1a426a2fee +3 -0
- logs/learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1726790562.1c1a426a2fee +2 -2
- logs/learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1726822282.1c1a426a2fee +3 -0
- logs/learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1726887093.1c1a426a2fee +3 -0
- logs/learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1726918980.1c1a426a2fee +3 -0
- logs/learning_rate=5e-05, lr_scheduler_kwargs=__power___1.0___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1726855239.1c1a426a2fee +3 -0
- model.safetensors +1 -1
- training_args.bin +2 -2
README.md
CHANGED
@@ -77,7 +77,7 @@ LlamaForCausalLM(
|
|
77 |
|
78 |
# Resource Usage
|
79 |
|
80 |
-
- Max Train VRAM Use: 13.
|
81 |
- Available VRAM: 23.4329 GB
|
82 |
- GPUs:
|
83 |
- 1x NVIDIA GeForce RTX 4090
|
@@ -144,13 +144,12 @@ The following hyperparameters were used during training:
|
|
144 |
<details>
|
145 |
<summary>Expand</summary>
|
146 |
|
147 |
-
- learning_rate: `0.
|
148 |
- train_batch_size: `8`
|
149 |
- eval_batch_size: `2`
|
150 |
- seed: `42`
|
151 |
- optimizer: `Adam with betas=(0.9,0.999) and epsilon=1e-08`
|
152 |
- lr_scheduler_type: `polynomial`
|
153 |
-
- lr_scheduler_warmup_ratio: `0.1`
|
154 |
- num_epochs: `1.0`
|
155 |
- distillation_objective: `DistillationObjective(
|
156 |
logits_loss_component=LossComponent(
|
@@ -164,7 +163,7 @@ The following hyperparameters were used during training:
|
|
164 |
weight=0
|
165 |
)
|
166 |
)`
|
167 |
-
- lr_scheduler: `<torch.optim.lr_scheduler.LambdaLR object at
|
168 |
- student_model_name_or_path: `None`
|
169 |
- student_config_name_or_path: `None`
|
170 |
- student_model_config: `{'num_hidden_layers': 15}`
|
@@ -188,7 +187,7 @@ The following hyperparameters were used during training:
|
|
188 |
- gradient_accumulation_steps: `1`
|
189 |
- weight_decay: `0.0`
|
190 |
- max_grad_norm: `1.0`
|
191 |
-
- warmup_ratio: `0.
|
192 |
- warmup_steps: `0`
|
193 |
- gradient_checkpointing: `True`
|
194 |
|
|
|
77 |
|
78 |
# Resource Usage
|
79 |
|
80 |
+
- Max Train VRAM Use: 13.1279 GB
|
81 |
- Available VRAM: 23.4329 GB
|
82 |
- GPUs:
|
83 |
- 1x NVIDIA GeForce RTX 4090
|
|
|
144 |
<details>
|
145 |
<summary>Expand</summary>
|
146 |
|
147 |
+
- learning_rate: `0.0002`
|
148 |
- train_batch_size: `8`
|
149 |
- eval_batch_size: `2`
|
150 |
- seed: `42`
|
151 |
- optimizer: `Adam with betas=(0.9,0.999) and epsilon=1e-08`
|
152 |
- lr_scheduler_type: `polynomial`
|
|
|
153 |
- num_epochs: `1.0`
|
154 |
- distillation_objective: `DistillationObjective(
|
155 |
logits_loss_component=LossComponent(
|
|
|
163 |
weight=0
|
164 |
)
|
165 |
)`
|
166 |
+
- lr_scheduler: `<torch.optim.lr_scheduler.LambdaLR object at 0x7786a35fef50>`
|
167 |
- student_model_name_or_path: `None`
|
168 |
- student_config_name_or_path: `None`
|
169 |
- student_model_config: `{'num_hidden_layers': 15}`
|
|
|
187 |
- gradient_accumulation_steps: `1`
|
188 |
- weight_decay: `0.0`
|
189 |
- max_grad_norm: `1.0`
|
190 |
+
- warmup_ratio: `0.0`
|
191 |
- warmup_steps: `0`
|
192 |
- gradient_checkpointing: `True`
|
193 |
|
logs/foo/bar.tb/events.out.tfevents.1726905162.1c1a426a2fee
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8fe2f4be43216ad38cd997dbcfb29f77ef6995c0ca354d179e3a3abc94926468
|
3 |
+
size 154
|
logs/learning_rate=0.0002, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8, warmup_ratio=0.1/events.out.tfevents.1726983816.1c1a426a2fee
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f8572fe5139111160bc267592477cc2664e0a23b27b9284ee7bbddc4cd22cff2
|
3 |
+
size 1649398
|
logs/learning_rate=0.0002, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1726951701.1c1a426a2fee
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33b706ca15d34d720b81405bb4a337d85e671f6b39089f3d392c9ba5565c5a0a
|
3 |
+
size 3432060
|
logs/learning_rate=0.0002, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1726983258.1c1a426a2fee
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:290104f22341cfb98d229d92d0d9103b0dc5674d3db0c2506a561bec232004a4
|
3 |
+
size 529
|
logs/learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8, warmup_ratio=0.1/events.out.tfevents.1726822881.1c1a426a2fee
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f668ef730a7b1fef022b707a0d2315f116d13189e7a57fe77fb5840f5caf4fc
|
3 |
+
size 3432093
|
logs/learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8, warmup_ratio=0.1/events.out.tfevents.1726854655.1c1a426a2fee
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc7dc9f74471b20cd6e308f41b1380cdbb45d62913496bd4b5bbf1f042a0a203
|
3 |
+
size 529
|
logs/learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8, warmup_ratio=0.1/events.out.tfevents.1726919574.1c1a426a2fee
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1852e4e0acda4814f13cd7171d324fc6709dcb6a8464dbbd0ab8681818751923
|
3 |
+
size 3432093
|
logs/learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8, warmup_ratio=0.1/events.out.tfevents.1726951144.1c1a426a2fee
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:427e6121d618847ca5b5c3c53df962fbcd9a657f598556e97c0a44615e0a2f9b
|
3 |
+
size 529
|
logs/learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1726790562.1c1a426a2fee
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:27bf218a8f3f03002c161c78fc3a0a3ca6417cd399e697c6f178517a9661825d
|
3 |
+
size 3432057
|
logs/learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1726822282.1c1a426a2fee
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:718d5dc6065e5a92884230eb37fd0456f8b184fc106f2d2e857dfbcfae10bbfa
|
3 |
+
size 529
|
logs/learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1726887093.1c1a426a2fee
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bf337cdcfa2c0b457a4e3ddef76a30738c49f4b347aa8685c442e0e841e11410
|
3 |
+
size 3432057
|
logs/learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1726918980.1c1a426a2fee
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:38b7c230b364e70b9764478f256758483871f7b9276965b436423c3161dc6baf
|
3 |
+
size 529
|
logs/learning_rate=5e-05, lr_scheduler_kwargs=__power___1.0___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1726855239.1c1a426a2fee
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:568c09d172d759bff69885609dd0ba449dacccd3ade00643d7f09f024a41d8d6
|
3 |
+
size 2591590
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 325669528
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4aa7b6cee0c4925becb583bf3699fb937915e554ad8bf4e2b1d9107494ef1011
|
3 |
size 325669528
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8a3dc7d5a29701a70887ac3cfe75e63edabeb5e9c6b22e4b6e9b16fa95b7e24c
|
3 |
+
size 5624
|