lapp0 commited on
Commit
bad0ed5
·
verified ·
1 Parent(s): c3c35c8

Training in progress, step 60000

Browse files
Files changed (16) hide show
  1. README.md +4 -5
  2. logs/foo/bar.tb/events.out.tfevents.1726905162.1c1a426a2fee +3 -0
  3. logs/learning_rate=0.0002, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8, warmup_ratio=0.1/events.out.tfevents.1726983816.1c1a426a2fee +3 -0
  4. logs/learning_rate=0.0002, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1726951701.1c1a426a2fee +3 -0
  5. logs/learning_rate=0.0002, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1726983258.1c1a426a2fee +3 -0
  6. logs/learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8, warmup_ratio=0.1/events.out.tfevents.1726822881.1c1a426a2fee +3 -0
  7. logs/learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8, warmup_ratio=0.1/events.out.tfevents.1726854655.1c1a426a2fee +3 -0
  8. logs/learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8, warmup_ratio=0.1/events.out.tfevents.1726919574.1c1a426a2fee +3 -0
  9. logs/learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8, warmup_ratio=0.1/events.out.tfevents.1726951144.1c1a426a2fee +3 -0
  10. logs/learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1726790562.1c1a426a2fee +2 -2
  11. logs/learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1726822282.1c1a426a2fee +3 -0
  12. logs/learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1726887093.1c1a426a2fee +3 -0
  13. logs/learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1726918980.1c1a426a2fee +3 -0
  14. logs/learning_rate=5e-05, lr_scheduler_kwargs=__power___1.0___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1726855239.1c1a426a2fee +3 -0
  15. model.safetensors +1 -1
  16. training_args.bin +2 -2
README.md CHANGED
@@ -77,7 +77,7 @@ LlamaForCausalLM(
77
 
78
  # Resource Usage
79
 
80
- - Max Train VRAM Use: 13.1273 GB
81
  - Available VRAM: 23.4329 GB
82
  - GPUs:
83
  - 1x NVIDIA GeForce RTX 4090
@@ -144,13 +144,12 @@ The following hyperparameters were used during training:
144
  <details>
145
  <summary>Expand</summary>
146
 
147
- - learning_rate: `0.0001`
148
  - train_batch_size: `8`
149
  - eval_batch_size: `2`
150
  - seed: `42`
151
  - optimizer: `Adam with betas=(0.9,0.999) and epsilon=1e-08`
152
  - lr_scheduler_type: `polynomial`
153
- - lr_scheduler_warmup_ratio: `0.1`
154
  - num_epochs: `1.0`
155
  - distillation_objective: `DistillationObjective(
156
  logits_loss_component=LossComponent(
@@ -164,7 +163,7 @@ The following hyperparameters were used during training:
164
  weight=0
165
  )
166
  )`
167
- - lr_scheduler: `<torch.optim.lr_scheduler.LambdaLR object at 0x7520daf28d30>`
168
  - student_model_name_or_path: `None`
169
  - student_config_name_or_path: `None`
170
  - student_model_config: `{'num_hidden_layers': 15}`
@@ -188,7 +187,7 @@ The following hyperparameters were used during training:
188
  - gradient_accumulation_steps: `1`
189
  - weight_decay: `0.0`
190
  - max_grad_norm: `1.0`
191
- - warmup_ratio: `0.1`
192
  - warmup_steps: `0`
193
  - gradient_checkpointing: `True`
194
 
 
77
 
78
  # Resource Usage
79
 
80
+ - Max Train VRAM Use: 13.1279 GB
81
  - Available VRAM: 23.4329 GB
82
  - GPUs:
83
  - 1x NVIDIA GeForce RTX 4090
 
144
  <details>
145
  <summary>Expand</summary>
146
 
147
+ - learning_rate: `0.0002`
148
  - train_batch_size: `8`
149
  - eval_batch_size: `2`
150
  - seed: `42`
151
  - optimizer: `Adam with betas=(0.9,0.999) and epsilon=1e-08`
152
  - lr_scheduler_type: `polynomial`
 
153
  - num_epochs: `1.0`
154
  - distillation_objective: `DistillationObjective(
155
  logits_loss_component=LossComponent(
 
163
  weight=0
164
  )
165
  )`
166
+ - lr_scheduler: `<torch.optim.lr_scheduler.LambdaLR object at 0x7786a35fef50>`
167
  - student_model_name_or_path: `None`
168
  - student_config_name_or_path: `None`
169
  - student_model_config: `{'num_hidden_layers': 15}`
 
187
  - gradient_accumulation_steps: `1`
188
  - weight_decay: `0.0`
189
  - max_grad_norm: `1.0`
190
+ - warmup_ratio: `0.0`
191
  - warmup_steps: `0`
192
  - gradient_checkpointing: `True`
193
 
logs/foo/bar.tb/events.out.tfevents.1726905162.1c1a426a2fee ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fe2f4be43216ad38cd997dbcfb29f77ef6995c0ca354d179e3a3abc94926468
3
+ size 154
logs/learning_rate=0.0002, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8, warmup_ratio=0.1/events.out.tfevents.1726983816.1c1a426a2fee ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8572fe5139111160bc267592477cc2664e0a23b27b9284ee7bbddc4cd22cff2
3
+ size 1649398
logs/learning_rate=0.0002, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1726951701.1c1a426a2fee ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33b706ca15d34d720b81405bb4a337d85e671f6b39089f3d392c9ba5565c5a0a
3
+ size 3432060
logs/learning_rate=0.0002, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1726983258.1c1a426a2fee ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:290104f22341cfb98d229d92d0d9103b0dc5674d3db0c2506a561bec232004a4
3
+ size 529
logs/learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8, warmup_ratio=0.1/events.out.tfevents.1726822881.1c1a426a2fee ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f668ef730a7b1fef022b707a0d2315f116d13189e7a57fe77fb5840f5caf4fc
3
+ size 3432093
logs/learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8, warmup_ratio=0.1/events.out.tfevents.1726854655.1c1a426a2fee ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc7dc9f74471b20cd6e308f41b1380cdbb45d62913496bd4b5bbf1f042a0a203
3
+ size 529
logs/learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8, warmup_ratio=0.1/events.out.tfevents.1726919574.1c1a426a2fee ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1852e4e0acda4814f13cd7171d324fc6709dcb6a8464dbbd0ab8681818751923
3
+ size 3432093
logs/learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8, warmup_ratio=0.1/events.out.tfevents.1726951144.1c1a426a2fee ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:427e6121d618847ca5b5c3c53df962fbcd9a657f598556e97c0a44615e0a2f9b
3
+ size 529
logs/learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1726790562.1c1a426a2fee CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3a8d2f0281c387551085af816b974db1071f275534061a94f1d24a5aaf008ea
3
- size 1924712
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27bf218a8f3f03002c161c78fc3a0a3ca6417cd399e697c6f178517a9661825d
3
+ size 3432057
logs/learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1726822282.1c1a426a2fee ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:718d5dc6065e5a92884230eb37fd0456f8b184fc106f2d2e857dfbcfae10bbfa
3
+ size 529
logs/learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1726887093.1c1a426a2fee ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf337cdcfa2c0b457a4e3ddef76a30738c49f4b347aa8685c442e0e841e11410
3
+ size 3432057
logs/learning_rate=5e-05, lr_scheduler_kwargs=__power___0.7___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1726918980.1c1a426a2fee ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38b7c230b364e70b9764478f256758483871f7b9276965b436423c3161dc6baf
3
+ size 529
logs/learning_rate=5e-05, lr_scheduler_kwargs=__power___1.0___lr_end___2e-05_, lr_scheduler_type=polynomial, per_device_train_batch_size=8/events.out.tfevents.1726855239.1c1a426a2fee ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:568c09d172d759bff69885609dd0ba449dacccd3ade00643d7f09f024a41d8d6
3
+ size 2591590
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:057ecb946053f9f9ba522624d78877ebe4555b60c5311c5269e15a26f18f278b
3
  size 325669528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4aa7b6cee0c4925becb583bf3699fb937915e554ad8bf4e2b1d9107494ef1011
3
  size 325669528
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7559b884144948c31f0e6529e344248ee71e7f34968a5f61750a8d9e31a6e94b
3
- size 5560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a3dc7d5a29701a70887ac3cfe75e63edabeb5e9c6b22e4b6e9b16fa95b7e24c
3
+ size 5624