Training in progress, step 811, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +81 -4
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 792912
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:55e901950eda5c3c11519f5869e78400ddd745c2f3fe32041dc5dd5a35a5e996
|
3 |
size 792912
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 807226
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:73ed21f6575ce7ac1358e5cf5c7c69d83efbe73affe310120a69fabf245a1f71
|
3 |
size 807226
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14960
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95854d5cc00d71cecca852780bc1bcfd6e88e4fb1b955160cbf9a3145c24d1f4
|
3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9c29de9e8fa8a45e29d4ef4934d4078cdeb0cb620b4247b9553ca2b1a8cbd29d
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:34653e4e06f0f7c40ebe51d437e12f779eafc5b99b228a15e55333dd8cf2018e
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b961f6d9076ef8d42e30189d58f3020d35ceddece8ef8239ee39ccc91e6823f5
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:70b60170e4ee207b411221d3212e057a7eb914ba6755abafc4c8bac2ff50fb1f
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 203,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -5639,6 +5639,83 @@
|
|
5639 |
"learning_rate": 5.154436320919942e-08,
|
5640 |
"loss": 4.0969,
|
5641 |
"step": 800
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5642 |
}
|
5643 |
],
|
5644 |
"logging_steps": 1,
|
@@ -5653,12 +5730,12 @@
|
|
5653 |
"should_evaluate": false,
|
5654 |
"should_log": false,
|
5655 |
"should_save": true,
|
5656 |
-
"should_training_stop":
|
5657 |
},
|
5658 |
"attributes": {}
|
5659 |
}
|
5660 |
},
|
5661 |
-
"total_flos":
|
5662 |
"train_batch_size": 2,
|
5663 |
"trial_name": null,
|
5664 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 1.000616903146206,
|
5 |
"eval_steps": 203,
|
6 |
+
"global_step": 811,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
5639 |
"learning_rate": 5.154436320919942e-08,
|
5640 |
"loss": 4.0969,
|
5641 |
"step": 800
|
5642 |
+
},
|
5643 |
+
{
|
5644 |
+
"epoch": 0.9882788402220851,
|
5645 |
+
"grad_norm": 657.191162109375,
|
5646 |
+
"learning_rate": 4.25999178087888e-08,
|
5647 |
+
"loss": 4.127,
|
5648 |
+
"step": 801
|
5649 |
+
},
|
5650 |
+
{
|
5651 |
+
"epoch": 0.9895126465144972,
|
5652 |
+
"grad_norm": 618.2482299804688,
|
5653 |
+
"learning_rate": 3.4506864562133815e-08,
|
5654 |
+
"loss": 3.99,
|
5655 |
+
"step": 802
|
5656 |
+
},
|
5657 |
+
{
|
5658 |
+
"epoch": 0.9907464528069093,
|
5659 |
+
"grad_norm": 729.18798828125,
|
5660 |
+
"learning_rate": 2.7265341393983845e-08,
|
5661 |
+
"loss": 4.0454,
|
5662 |
+
"step": 803
|
5663 |
+
},
|
5664 |
+
{
|
5665 |
+
"epoch": 0.9919802590993214,
|
5666 |
+
"grad_norm": 660.1466064453125,
|
5667 |
+
"learning_rate": 2.0875471717013427e-08,
|
5668 |
+
"loss": 4.1317,
|
5669 |
+
"step": 804
|
5670 |
+
},
|
5671 |
+
{
|
5672 |
+
"epoch": 0.9932140653917335,
|
5673 |
+
"grad_norm": 619.5026245117188,
|
5674 |
+
"learning_rate": 1.5337364429696132e-08,
|
5675 |
+
"loss": 3.9811,
|
5676 |
+
"step": 805
|
5677 |
+
},
|
5678 |
+
{
|
5679 |
+
"epoch": 0.9944478716841456,
|
5680 |
+
"grad_norm": 849.2465209960938,
|
5681 |
+
"learning_rate": 1.065111391447271e-08,
|
5682 |
+
"loss": 4.1834,
|
5683 |
+
"step": 806
|
5684 |
+
},
|
5685 |
+
{
|
5686 |
+
"epoch": 0.9956816779765577,
|
5687 |
+
"grad_norm": 727.19580078125,
|
5688 |
+
"learning_rate": 6.816800036124615e-09,
|
5689 |
+
"loss": 4.224,
|
5690 |
+
"step": 807
|
5691 |
+
},
|
5692 |
+
{
|
5693 |
+
"epoch": 0.9969154842689698,
|
5694 |
+
"grad_norm": 646.8365478515625,
|
5695 |
+
"learning_rate": 3.8344881404195345e-09,
|
5696 |
+
"loss": 3.9375,
|
5697 |
+
"step": 808
|
5698 |
+
},
|
5699 |
+
{
|
5700 |
+
"epoch": 0.9981492905613819,
|
5701 |
+
"grad_norm": 956.5570068359375,
|
5702 |
+
"learning_rate": 1.7042290529956095e-09,
|
5703 |
+
"loss": 4.0443,
|
5704 |
+
"step": 809
|
5705 |
+
},
|
5706 |
+
{
|
5707 |
+
"epoch": 0.999383096853794,
|
5708 |
+
"grad_norm": 1171.58642578125,
|
5709 |
+
"learning_rate": 4.260590785121199e-10,
|
5710 |
+
"loss": 4.0195,
|
5711 |
+
"step": 810
|
5712 |
+
},
|
5713 |
+
{
|
5714 |
+
"epoch": 1.000616903146206,
|
5715 |
+
"grad_norm": 823.234130859375,
|
5716 |
+
"learning_rate": 0.0,
|
5717 |
+
"loss": 4.1185,
|
5718 |
+
"step": 811
|
5719 |
}
|
5720 |
],
|
5721 |
"logging_steps": 1,
|
|
|
5730 |
"should_evaluate": false,
|
5731 |
"should_log": false,
|
5732 |
"should_save": true,
|
5733 |
+
"should_training_stop": true
|
5734 |
},
|
5735 |
"attributes": {}
|
5736 |
}
|
5737 |
},
|
5738 |
+
"total_flos": 5387190162948096.0,
|
5739 |
"train_batch_size": 2,
|
5740 |
"trial_name": null,
|
5741 |
"trial_params": null
|