Training in progress, step 12000, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 891558696
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:12d89b6cfeb0fa2a639fb1d022803e910e636a1653929f3379ba8ecc07936d2d
|
3 |
size 891558696
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1783272762
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:16e8ca890c29387dcb94fe4fee166151a4647fb651f909dbd97850a259675cfe
|
3 |
size 1783272762
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8eee71f4c759651379c503d3028bec932d355f171dd7453ec6f5c469e966f747
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b63e11db1a8e7c7a242100e7b3a9500ec8f1ad290a19c61a227cd5ed6d79dcc2
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "./fine-tuned/checkpoint-
|
4 |
-
"epoch": 2.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2537,6 +2537,116 @@
|
|
2537 |
"eval_samples_per_second": 25.457,
|
2538 |
"eval_steps_per_second": 3.185,
|
2539 |
"step": 11500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2540 |
}
|
2541 |
],
|
2542 |
"logging_steps": 50,
|
@@ -2556,7 +2666,7 @@
|
|
2556 |
"attributes": {}
|
2557 |
}
|
2558 |
},
|
2559 |
-
"total_flos": 5.
|
2560 |
"train_batch_size": 8,
|
2561 |
"trial_name": null,
|
2562 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.10353059321641922,
|
3 |
+
"best_model_checkpoint": "./fine-tuned/checkpoint-12000",
|
4 |
+
"epoch": 2.107666637393519,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 12000,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2537 |
"eval_samples_per_second": 25.457,
|
2538 |
"eval_steps_per_second": 3.185,
|
2539 |
"step": 11500
|
2540 |
+
},
|
2541 |
+
{
|
2542 |
+
"epoch": 2.028629138491262,
|
2543 |
+
"grad_norm": 8534.212890625,
|
2544 |
+
"learning_rate": 1.4783945195854559e-05,
|
2545 |
+
"loss": 0.0969,
|
2546 |
+
"step": 11550
|
2547 |
+
},
|
2548 |
+
{
|
2549 |
+
"epoch": 2.037411082813735,
|
2550 |
+
"grad_norm": 10853.4990234375,
|
2551 |
+
"learning_rate": 1.4718074828737047e-05,
|
2552 |
+
"loss": 0.1016,
|
2553 |
+
"step": 11600
|
2554 |
+
},
|
2555 |
+
{
|
2556 |
+
"epoch": 2.037411082813735,
|
2557 |
+
"eval_loss": 0.10368319600820541,
|
2558 |
+
"eval_runtime": 175.2089,
|
2559 |
+
"eval_samples_per_second": 25.455,
|
2560 |
+
"eval_steps_per_second": 3.185,
|
2561 |
+
"step": 11600
|
2562 |
+
},
|
2563 |
+
{
|
2564 |
+
"epoch": 2.046193027136208,
|
2565 |
+
"grad_norm": 10575.98828125,
|
2566 |
+
"learning_rate": 1.4652204461619534e-05,
|
2567 |
+
"loss": 0.0993,
|
2568 |
+
"step": 11650
|
2569 |
+
},
|
2570 |
+
{
|
2571 |
+
"epoch": 2.054974971458681,
|
2572 |
+
"grad_norm": 17123.625,
|
2573 |
+
"learning_rate": 1.458633409450202e-05,
|
2574 |
+
"loss": 0.1011,
|
2575 |
+
"step": 11700
|
2576 |
+
},
|
2577 |
+
{
|
2578 |
+
"epoch": 2.054974971458681,
|
2579 |
+
"eval_loss": 0.10356248915195465,
|
2580 |
+
"eval_runtime": 175.2585,
|
2581 |
+
"eval_samples_per_second": 25.448,
|
2582 |
+
"eval_steps_per_second": 3.184,
|
2583 |
+
"step": 11700
|
2584 |
+
},
|
2585 |
+
{
|
2586 |
+
"epoch": 2.0637569157811537,
|
2587 |
+
"grad_norm": 13095.1728515625,
|
2588 |
+
"learning_rate": 1.4520463727384507e-05,
|
2589 |
+
"loss": 0.0956,
|
2590 |
+
"step": 11750
|
2591 |
+
},
|
2592 |
+
{
|
2593 |
+
"epoch": 2.0725388601036268,
|
2594 |
+
"grad_norm": 11280.3291015625,
|
2595 |
+
"learning_rate": 1.4454593360266995e-05,
|
2596 |
+
"loss": 0.0969,
|
2597 |
+
"step": 11800
|
2598 |
+
},
|
2599 |
+
{
|
2600 |
+
"epoch": 2.0725388601036268,
|
2601 |
+
"eval_loss": 0.10367120802402496,
|
2602 |
+
"eval_runtime": 175.3325,
|
2603 |
+
"eval_samples_per_second": 25.437,
|
2604 |
+
"eval_steps_per_second": 3.183,
|
2605 |
+
"step": 11800
|
2606 |
+
},
|
2607 |
+
{
|
2608 |
+
"epoch": 2.0813208044261,
|
2609 |
+
"grad_norm": 8861.095703125,
|
2610 |
+
"learning_rate": 1.4388722993149482e-05,
|
2611 |
+
"loss": 0.0971,
|
2612 |
+
"step": 11850
|
2613 |
+
},
|
2614 |
+
{
|
2615 |
+
"epoch": 2.090102748748573,
|
2616 |
+
"grad_norm": 15480.5634765625,
|
2617 |
+
"learning_rate": 1.432285262603197e-05,
|
2618 |
+
"loss": 0.0977,
|
2619 |
+
"step": 11900
|
2620 |
+
},
|
2621 |
+
{
|
2622 |
+
"epoch": 2.090102748748573,
|
2623 |
+
"eval_loss": 0.1037474200129509,
|
2624 |
+
"eval_runtime": 175.4966,
|
2625 |
+
"eval_samples_per_second": 25.414,
|
2626 |
+
"eval_steps_per_second": 3.18,
|
2627 |
+
"step": 11900
|
2628 |
+
},
|
2629 |
+
{
|
2630 |
+
"epoch": 2.098884693071046,
|
2631 |
+
"grad_norm": 10802.611328125,
|
2632 |
+
"learning_rate": 1.4256982258914457e-05,
|
2633 |
+
"loss": 0.0979,
|
2634 |
+
"step": 11950
|
2635 |
+
},
|
2636 |
+
{
|
2637 |
+
"epoch": 2.107666637393519,
|
2638 |
+
"grad_norm": 7810.14111328125,
|
2639 |
+
"learning_rate": 1.4191111891796943e-05,
|
2640 |
+
"loss": 0.0963,
|
2641 |
+
"step": 12000
|
2642 |
+
},
|
2643 |
+
{
|
2644 |
+
"epoch": 2.107666637393519,
|
2645 |
+
"eval_loss": 0.10353059321641922,
|
2646 |
+
"eval_runtime": 175.3863,
|
2647 |
+
"eval_samples_per_second": 25.43,
|
2648 |
+
"eval_steps_per_second": 3.182,
|
2649 |
+
"step": 12000
|
2650 |
}
|
2651 |
],
|
2652 |
"logging_steps": 50,
|
|
|
2666 |
"attributes": {}
|
2667 |
}
|
2668 |
},
|
2669 |
+
"total_flos": 5.845569478852608e+16,
|
2670 |
"train_batch_size": 8,
|
2671 |
"trial_name": null,
|
2672 |
"trial_params": null
|