Training in progress, step 400, checkpoint
Browse files- last-checkpoint/adapter_model.safetensors +1 -1
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +371 -5
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 78480072
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:17224b2b3a91c0e9521d45a24436b841bfe6523957efd7e7cd1dd6c5ebdc3b1b
|
3 |
size 78480072
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 157104826
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0e37a68b13eabe1e8db809461fb70508ce003e5a782bb20d5ded4257e3d6e09b
|
3 |
size 157104826
|
last-checkpoint/rng_state_0.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3c902599e1e01ff9dd003b50caf7eaa257324aa8302b2cf854a158eb29295c69
|
3 |
size 15024
|
last-checkpoint/rng_state_1.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:de2bfb1b87844b729adff3b7b1db70234c5ded5f2c4da917a4abb3fe4d0e1d04
|
3 |
size 15024
|
last-checkpoint/rng_state_2.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d47fce9310ac087a1e60a1910546a7146b9eb3606950c9f78a8a4262e409e9fc
|
3 |
size 15024
|
last-checkpoint/rng_state_3.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 15024
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3085a74797e29a3dad431860055a9066c135bf30fe41ef3edbddc1e5b4931620
|
3 |
size 15024
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a6bd280b1fbb3316d4d258b95b1a2fe83754ff0ca1384b0f72c1296f3c08afc
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 1.
|
3 |
-
"best_model_checkpoint": "miner_id_24/checkpoint-
|
4 |
-
"epoch": 1.
|
5 |
"eval_steps": 25,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -2577,6 +2577,372 @@
|
|
2577 |
"eval_samples_per_second": 71.844,
|
2578 |
"eval_steps_per_second": 18.68,
|
2579 |
"step": 350
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2580 |
}
|
2581 |
],
|
2582 |
"logging_steps": 1,
|
@@ -2605,7 +2971,7 @@
|
|
2605 |
"attributes": {}
|
2606 |
}
|
2607 |
},
|
2608 |
-
"total_flos": 2.
|
2609 |
"train_batch_size": 1,
|
2610 |
"trial_name": null,
|
2611 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 1.7484028447256605e-08,
|
3 |
+
"best_model_checkpoint": "miner_id_24/checkpoint-400",
|
4 |
+
"epoch": 1.339030892269092,
|
5 |
"eval_steps": 25,
|
6 |
+
"global_step": 400,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
2577 |
"eval_samples_per_second": 71.844,
|
2578 |
"eval_steps_per_second": 18.68,
|
2579 |
"step": 350
|
2580 |
+
},
|
2581 |
+
{
|
2582 |
+
"epoch": 1.175108462704511,
|
2583 |
+
"grad_norm": 6.101056442275876e-06,
|
2584 |
+
"learning_rate": 0.0001466262735086344,
|
2585 |
+
"loss": 0.0,
|
2586 |
+
"step": 351
|
2587 |
+
},
|
2588 |
+
{
|
2589 |
+
"epoch": 1.1784538184099107,
|
2590 |
+
"grad_norm": 5.789248461951502e-06,
|
2591 |
+
"learning_rate": 0.00014632641266128428,
|
2592 |
+
"loss": 0.0,
|
2593 |
+
"step": 352
|
2594 |
+
},
|
2595 |
+
{
|
2596 |
+
"epoch": 1.1817991741153102,
|
2597 |
+
"grad_norm": 5.922979653405491e-06,
|
2598 |
+
"learning_rate": 0.00014602606930290456,
|
2599 |
+
"loss": 0.0,
|
2600 |
+
"step": 353
|
2601 |
+
},
|
2602 |
+
{
|
2603 |
+
"epoch": 1.1851445298207097,
|
2604 |
+
"grad_norm": 5.770634743385017e-06,
|
2605 |
+
"learning_rate": 0.0001457252474228511,
|
2606 |
+
"loss": 0.0,
|
2607 |
+
"step": 354
|
2608 |
+
},
|
2609 |
+
{
|
2610 |
+
"epoch": 1.1884898855261095,
|
2611 |
+
"grad_norm": 5.891377441002987e-06,
|
2612 |
+
"learning_rate": 0.00014542395101683561,
|
2613 |
+
"loss": 0.0,
|
2614 |
+
"step": 355
|
2615 |
+
},
|
2616 |
+
{
|
2617 |
+
"epoch": 1.1918352412315092,
|
2618 |
+
"grad_norm": 5.779205366707174e-06,
|
2619 |
+
"learning_rate": 0.00014512218408687286,
|
2620 |
+
"loss": 0.0,
|
2621 |
+
"step": 356
|
2622 |
+
},
|
2623 |
+
{
|
2624 |
+
"epoch": 1.1951805969369087,
|
2625 |
+
"grad_norm": 6.2929498199082445e-06,
|
2626 |
+
"learning_rate": 0.0001448199506412274,
|
2627 |
+
"loss": 0.0,
|
2628 |
+
"step": 357
|
2629 |
+
},
|
2630 |
+
{
|
2631 |
+
"epoch": 1.1985259526423082,
|
2632 |
+
"grad_norm": 5.800653980259085e-06,
|
2633 |
+
"learning_rate": 0.00014451725469436037,
|
2634 |
+
"loss": 0.0,
|
2635 |
+
"step": 358
|
2636 |
+
},
|
2637 |
+
{
|
2638 |
+
"epoch": 1.201871308347708,
|
2639 |
+
"grad_norm": 6.026995379215805e-06,
|
2640 |
+
"learning_rate": 0.00014421410026687609,
|
2641 |
+
"loss": 0.0,
|
2642 |
+
"step": 359
|
2643 |
+
},
|
2644 |
+
{
|
2645 |
+
"epoch": 1.2052166640531075,
|
2646 |
+
"grad_norm": 6.040312655386515e-06,
|
2647 |
+
"learning_rate": 0.00014391049138546872,
|
2648 |
+
"loss": 0.0,
|
2649 |
+
"step": 360
|
2650 |
+
},
|
2651 |
+
{
|
2652 |
+
"epoch": 1.2085620197585072,
|
2653 |
+
"grad_norm": 6.384302650985774e-06,
|
2654 |
+
"learning_rate": 0.00014360643208286887,
|
2655 |
+
"loss": 0.0,
|
2656 |
+
"step": 361
|
2657 |
+
},
|
2658 |
+
{
|
2659 |
+
"epoch": 1.2119073754639067,
|
2660 |
+
"grad_norm": 6.054286586731905e-06,
|
2661 |
+
"learning_rate": 0.00014330192639778986,
|
2662 |
+
"loss": 0.0,
|
2663 |
+
"step": 362
|
2664 |
+
},
|
2665 |
+
{
|
2666 |
+
"epoch": 1.2152527311693064,
|
2667 |
+
"grad_norm": 5.83691962674493e-06,
|
2668 |
+
"learning_rate": 0.00014299697837487414,
|
2669 |
+
"loss": 0.0,
|
2670 |
+
"step": 363
|
2671 |
+
},
|
2672 |
+
{
|
2673 |
+
"epoch": 1.218598086874706,
|
2674 |
+
"grad_norm": 5.789472197648138e-06,
|
2675 |
+
"learning_rate": 0.0001426915920646396,
|
2676 |
+
"loss": 0.0,
|
2677 |
+
"step": 364
|
2678 |
+
},
|
2679 |
+
{
|
2680 |
+
"epoch": 1.2219434425801057,
|
2681 |
+
"grad_norm": 6.030274562363047e-06,
|
2682 |
+
"learning_rate": 0.0001423857715234258,
|
2683 |
+
"loss": 0.0,
|
2684 |
+
"step": 365
|
2685 |
+
},
|
2686 |
+
{
|
2687 |
+
"epoch": 1.2252887982855052,
|
2688 |
+
"grad_norm": 6.083088010200299e-06,
|
2689 |
+
"learning_rate": 0.00014207952081333992,
|
2690 |
+
"loss": 0.0,
|
2691 |
+
"step": 366
|
2692 |
+
},
|
2693 |
+
{
|
2694 |
+
"epoch": 1.228634153990905,
|
2695 |
+
"grad_norm": 5.530882390303304e-06,
|
2696 |
+
"learning_rate": 0.00014177284400220306,
|
2697 |
+
"loss": 0.0,
|
2698 |
+
"step": 367
|
2699 |
+
},
|
2700 |
+
{
|
2701 |
+
"epoch": 1.2319795096963044,
|
2702 |
+
"grad_norm": 5.863201749889413e-06,
|
2703 |
+
"learning_rate": 0.00014146574516349595,
|
2704 |
+
"loss": 0.0,
|
2705 |
+
"step": 368
|
2706 |
+
},
|
2707 |
+
{
|
2708 |
+
"epoch": 1.235324865401704,
|
2709 |
+
"grad_norm": 6.111798938945867e-06,
|
2710 |
+
"learning_rate": 0.000141158228376305,
|
2711 |
+
"loss": 0.0,
|
2712 |
+
"step": 369
|
2713 |
+
},
|
2714 |
+
{
|
2715 |
+
"epoch": 1.2386702211071037,
|
2716 |
+
"grad_norm": 6.045273494237335e-06,
|
2717 |
+
"learning_rate": 0.00014085029772526814,
|
2718 |
+
"loss": 0.0,
|
2719 |
+
"step": 370
|
2720 |
+
},
|
2721 |
+
{
|
2722 |
+
"epoch": 1.2420155768125032,
|
2723 |
+
"grad_norm": 6.002165719110053e-06,
|
2724 |
+
"learning_rate": 0.0001405419573005205,
|
2725 |
+
"loss": 0.0,
|
2726 |
+
"step": 371
|
2727 |
+
},
|
2728 |
+
{
|
2729 |
+
"epoch": 1.245360932517903,
|
2730 |
+
"grad_norm": 6.039120307832491e-06,
|
2731 |
+
"learning_rate": 0.00014023321119764002,
|
2732 |
+
"loss": 0.0,
|
2733 |
+
"step": 372
|
2734 |
+
},
|
2735 |
+
{
|
2736 |
+
"epoch": 1.2487062882233024,
|
2737 |
+
"grad_norm": 6.003551789035555e-06,
|
2738 |
+
"learning_rate": 0.0001399240635175932,
|
2739 |
+
"loss": 0.0,
|
2740 |
+
"step": 373
|
2741 |
+
},
|
2742 |
+
{
|
2743 |
+
"epoch": 1.2520516439287022,
|
2744 |
+
"grad_norm": 5.915948804613436e-06,
|
2745 |
+
"learning_rate": 0.00013961451836668043,
|
2746 |
+
"loss": 0.0,
|
2747 |
+
"step": 374
|
2748 |
+
},
|
2749 |
+
{
|
2750 |
+
"epoch": 1.2553969996341017,
|
2751 |
+
"grad_norm": 5.859969860466663e-06,
|
2752 |
+
"learning_rate": 0.00013930457985648168,
|
2753 |
+
"loss": 0.0,
|
2754 |
+
"step": 375
|
2755 |
+
},
|
2756 |
+
{
|
2757 |
+
"epoch": 1.2553969996341017,
|
2758 |
+
"eval_loss": 1.7748938319073204e-08,
|
2759 |
+
"eval_runtime": 0.6961,
|
2760 |
+
"eval_samples_per_second": 71.824,
|
2761 |
+
"eval_steps_per_second": 18.674,
|
2762 |
+
"step": 375
|
2763 |
+
},
|
2764 |
+
{
|
2765 |
+
"epoch": 1.2587423553395014,
|
2766 |
+
"grad_norm": 5.691527803719509e-06,
|
2767 |
+
"learning_rate": 0.00013899425210380176,
|
2768 |
+
"loss": 0.0,
|
2769 |
+
"step": 376
|
2770 |
+
},
|
2771 |
+
{
|
2772 |
+
"epoch": 1.262087711044901,
|
2773 |
+
"grad_norm": 5.914226676395629e-06,
|
2774 |
+
"learning_rate": 0.00013868353923061563,
|
2775 |
+
"loss": 0.0,
|
2776 |
+
"step": 377
|
2777 |
+
},
|
2778 |
+
{
|
2779 |
+
"epoch": 1.2654330667503007,
|
2780 |
+
"grad_norm": 5.989952114759944e-06,
|
2781 |
+
"learning_rate": 0.0001383724453640137,
|
2782 |
+
"loss": 0.0,
|
2783 |
+
"step": 378
|
2784 |
+
},
|
2785 |
+
{
|
2786 |
+
"epoch": 1.2687784224557002,
|
2787 |
+
"grad_norm": 5.64041874895338e-06,
|
2788 |
+
"learning_rate": 0.00013806097463614692,
|
2789 |
+
"loss": 0.0,
|
2790 |
+
"step": 379
|
2791 |
+
},
|
2792 |
+
{
|
2793 |
+
"epoch": 1.2721237781610997,
|
2794 |
+
"grad_norm": 6.035153546690708e-06,
|
2795 |
+
"learning_rate": 0.00013774913118417195,
|
2796 |
+
"loss": 0.0,
|
2797 |
+
"step": 380
|
2798 |
+
},
|
2799 |
+
{
|
2800 |
+
"epoch": 1.2754691338664994,
|
2801 |
+
"grad_norm": 5.875488113815663e-06,
|
2802 |
+
"learning_rate": 0.0001374369191501963,
|
2803 |
+
"loss": 0.0,
|
2804 |
+
"step": 381
|
2805 |
+
},
|
2806 |
+
{
|
2807 |
+
"epoch": 1.2788144895718991,
|
2808 |
+
"grad_norm": 6.396473509084899e-06,
|
2809 |
+
"learning_rate": 0.00013712434268122324,
|
2810 |
+
"loss": 0.0,
|
2811 |
+
"step": 382
|
2812 |
+
},
|
2813 |
+
{
|
2814 |
+
"epoch": 1.2821598452772986,
|
2815 |
+
"grad_norm": 5.975215117359767e-06,
|
2816 |
+
"learning_rate": 0.00013681140592909652,
|
2817 |
+
"loss": 0.0,
|
2818 |
+
"step": 383
|
2819 |
+
},
|
2820 |
+
{
|
2821 |
+
"epoch": 1.2855052009826982,
|
2822 |
+
"grad_norm": 5.922964191995561e-06,
|
2823 |
+
"learning_rate": 0.00013649811305044558,
|
2824 |
+
"loss": 0.0,
|
2825 |
+
"step": 384
|
2826 |
+
},
|
2827 |
+
{
|
2828 |
+
"epoch": 1.288850556688098,
|
2829 |
+
"grad_norm": 5.88319653616054e-06,
|
2830 |
+
"learning_rate": 0.00013618446820663015,
|
2831 |
+
"loss": 0.0,
|
2832 |
+
"step": 385
|
2833 |
+
},
|
2834 |
+
{
|
2835 |
+
"epoch": 1.2921959123934974,
|
2836 |
+
"grad_norm": 6.589156328118406e-06,
|
2837 |
+
"learning_rate": 0.00013587047556368493,
|
2838 |
+
"loss": 0.0,
|
2839 |
+
"step": 386
|
2840 |
+
},
|
2841 |
+
{
|
2842 |
+
"epoch": 1.2955412680988971,
|
2843 |
+
"grad_norm": 5.7994789131043945e-06,
|
2844 |
+
"learning_rate": 0.00013555613929226433,
|
2845 |
+
"loss": 0.0,
|
2846 |
+
"step": 387
|
2847 |
+
},
|
2848 |
+
{
|
2849 |
+
"epoch": 1.2988866238042966,
|
2850 |
+
"grad_norm": 5.76379807171179e-06,
|
2851 |
+
"learning_rate": 0.00013524146356758704,
|
2852 |
+
"loss": 0.0,
|
2853 |
+
"step": 388
|
2854 |
+
},
|
2855 |
+
{
|
2856 |
+
"epoch": 1.3022319795096964,
|
2857 |
+
"grad_norm": 5.537129709409783e-06,
|
2858 |
+
"learning_rate": 0.00013492645256938068,
|
2859 |
+
"loss": 0.0,
|
2860 |
+
"step": 389
|
2861 |
+
},
|
2862 |
+
{
|
2863 |
+
"epoch": 1.305577335215096,
|
2864 |
+
"grad_norm": 5.805138698633527e-06,
|
2865 |
+
"learning_rate": 0.00013461111048182608,
|
2866 |
+
"loss": 0.0,
|
2867 |
+
"step": 390
|
2868 |
+
},
|
2869 |
+
{
|
2870 |
+
"epoch": 1.3089226909204954,
|
2871 |
+
"grad_norm": 5.993603735987563e-06,
|
2872 |
+
"learning_rate": 0.00013429544149350187,
|
2873 |
+
"loss": 0.0,
|
2874 |
+
"step": 391
|
2875 |
+
},
|
2876 |
+
{
|
2877 |
+
"epoch": 1.3122680466258951,
|
2878 |
+
"grad_norm": 5.531137048819801e-06,
|
2879 |
+
"learning_rate": 0.00013397944979732872,
|
2880 |
+
"loss": 0.0,
|
2881 |
+
"step": 392
|
2882 |
+
},
|
2883 |
+
{
|
2884 |
+
"epoch": 1.3156134023312949,
|
2885 |
+
"grad_norm": 5.714673534384929e-06,
|
2886 |
+
"learning_rate": 0.00013366313959051383,
|
2887 |
+
"loss": 0.0,
|
2888 |
+
"step": 393
|
2889 |
+
},
|
2890 |
+
{
|
2891 |
+
"epoch": 1.3189587580366944,
|
2892 |
+
"grad_norm": 5.987948497931939e-06,
|
2893 |
+
"learning_rate": 0.000133346515074495,
|
2894 |
+
"loss": 0.0,
|
2895 |
+
"step": 394
|
2896 |
+
},
|
2897 |
+
{
|
2898 |
+
"epoch": 1.322304113742094,
|
2899 |
+
"grad_norm": 6.1729906519758515e-06,
|
2900 |
+
"learning_rate": 0.00013302958045488493,
|
2901 |
+
"loss": 0.0,
|
2902 |
+
"step": 395
|
2903 |
+
},
|
2904 |
+
{
|
2905 |
+
"epoch": 1.3256494694474936,
|
2906 |
+
"grad_norm": 5.74833165956079e-06,
|
2907 |
+
"learning_rate": 0.00013271233994141516,
|
2908 |
+
"loss": 0.0,
|
2909 |
+
"step": 396
|
2910 |
+
},
|
2911 |
+
{
|
2912 |
+
"epoch": 1.3289948251528931,
|
2913 |
+
"grad_norm": 5.835729098180309e-06,
|
2914 |
+
"learning_rate": 0.0001323947977478806,
|
2915 |
+
"loss": 0.0,
|
2916 |
+
"step": 397
|
2917 |
+
},
|
2918 |
+
{
|
2919 |
+
"epoch": 1.3323401808582929,
|
2920 |
+
"grad_norm": 5.932717158430023e-06,
|
2921 |
+
"learning_rate": 0.00013207695809208295,
|
2922 |
+
"loss": 0.0,
|
2923 |
+
"step": 398
|
2924 |
+
},
|
2925 |
+
{
|
2926 |
+
"epoch": 1.3356855365636924,
|
2927 |
+
"grad_norm": 6.128618224465754e-06,
|
2928 |
+
"learning_rate": 0.00013175882519577526,
|
2929 |
+
"loss": 0.0,
|
2930 |
+
"step": 399
|
2931 |
+
},
|
2932 |
+
{
|
2933 |
+
"epoch": 1.339030892269092,
|
2934 |
+
"grad_norm": 5.671974122378742e-06,
|
2935 |
+
"learning_rate": 0.00013144040328460545,
|
2936 |
+
"loss": 0.0,
|
2937 |
+
"step": 400
|
2938 |
+
},
|
2939 |
+
{
|
2940 |
+
"epoch": 1.339030892269092,
|
2941 |
+
"eval_loss": 1.7484028447256605e-08,
|
2942 |
+
"eval_runtime": 0.6964,
|
2943 |
+
"eval_samples_per_second": 71.794,
|
2944 |
+
"eval_steps_per_second": 18.666,
|
2945 |
+
"step": 400
|
2946 |
}
|
2947 |
],
|
2948 |
"logging_steps": 1,
|
|
|
2971 |
"attributes": {}
|
2972 |
}
|
2973 |
},
|
2974 |
+
"total_flos": 2.518464884165837e+18,
|
2975 |
"train_batch_size": 1,
|
2976 |
"trial_name": null,
|
2977 |
"trial_params": null
|