TracyTank commited on
Commit
9e12343
·
verified ·
1 Parent(s): 21fe05d

Training in progress, step 400, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b5ec50a7104fda4ac1b053eb567bb5f9f914a269d9377cb9aa27543c508dc71
3
  size 78480072
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17224b2b3a91c0e9521d45a24436b841bfe6523957efd7e7cd1dd6c5ebdc3b1b
3
  size 78480072
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4793427131c2566b87d8a91dde8199046e548543ba605414567b32a1eb638e74
3
  size 157104826
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e37a68b13eabe1e8db809461fb70508ce003e5a782bb20d5ded4257e3d6e09b
3
  size 157104826
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e872bff49deac1c152ce1064c76aadba1ea767edf8badf5b3ad0550efa67cf88
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c902599e1e01ff9dd003b50caf7eaa257324aa8302b2cf854a158eb29295c69
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd51e4a2fa1ed45d423e33cfdb4f66461311ef195a7e260829c1b7ab9c2bf09c
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de2bfb1b87844b729adff3b7b1db70234c5ded5f2c4da917a4abb3fe4d0e1d04
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c9067a03792836886ea0f8e4e88f4a3bb57bdb9d4a532845d679e011cf46d1af
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d47fce9310ac087a1e60a1910546a7146b9eb3606950c9f78a8a4262e409e9fc
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bdbee30d236530da3455941ea1083517e6c7589a0f5586a7fe0002637c7bb89f
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3085a74797e29a3dad431860055a9066c135bf30fe41ef3edbddc1e5b4931620
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb1353079ebffc1935b9ccf84a11807834b56e3e2395d00b0b812aabfddd356a
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a6bd280b1fbb3316d4d258b95b1a2fe83754ff0ca1384b0f72c1296f3c08afc
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.9603303869075717e-08,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-350",
4
- "epoch": 1.1717631069991115,
5
  "eval_steps": 25,
6
- "global_step": 350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2577,6 +2577,372 @@
2577
  "eval_samples_per_second": 71.844,
2578
  "eval_steps_per_second": 18.68,
2579
  "step": 350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2580
  }
2581
  ],
2582
  "logging_steps": 1,
@@ -2605,7 +2971,7 @@
2605
  "attributes": {}
2606
  }
2607
  },
2608
- "total_flos": 2.2036567736451072e+18,
2609
  "train_batch_size": 1,
2610
  "trial_name": null,
2611
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.7484028447256605e-08,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-400",
4
+ "epoch": 1.339030892269092,
5
  "eval_steps": 25,
6
+ "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2577
  "eval_samples_per_second": 71.844,
2578
  "eval_steps_per_second": 18.68,
2579
  "step": 350
2580
+ },
2581
+ {
2582
+ "epoch": 1.175108462704511,
2583
+ "grad_norm": 6.101056442275876e-06,
2584
+ "learning_rate": 0.0001466262735086344,
2585
+ "loss": 0.0,
2586
+ "step": 351
2587
+ },
2588
+ {
2589
+ "epoch": 1.1784538184099107,
2590
+ "grad_norm": 5.789248461951502e-06,
2591
+ "learning_rate": 0.00014632641266128428,
2592
+ "loss": 0.0,
2593
+ "step": 352
2594
+ },
2595
+ {
2596
+ "epoch": 1.1817991741153102,
2597
+ "grad_norm": 5.922979653405491e-06,
2598
+ "learning_rate": 0.00014602606930290456,
2599
+ "loss": 0.0,
2600
+ "step": 353
2601
+ },
2602
+ {
2603
+ "epoch": 1.1851445298207097,
2604
+ "grad_norm": 5.770634743385017e-06,
2605
+ "learning_rate": 0.0001457252474228511,
2606
+ "loss": 0.0,
2607
+ "step": 354
2608
+ },
2609
+ {
2610
+ "epoch": 1.1884898855261095,
2611
+ "grad_norm": 5.891377441002987e-06,
2612
+ "learning_rate": 0.00014542395101683561,
2613
+ "loss": 0.0,
2614
+ "step": 355
2615
+ },
2616
+ {
2617
+ "epoch": 1.1918352412315092,
2618
+ "grad_norm": 5.779205366707174e-06,
2619
+ "learning_rate": 0.00014512218408687286,
2620
+ "loss": 0.0,
2621
+ "step": 356
2622
+ },
2623
+ {
2624
+ "epoch": 1.1951805969369087,
2625
+ "grad_norm": 6.2929498199082445e-06,
2626
+ "learning_rate": 0.0001448199506412274,
2627
+ "loss": 0.0,
2628
+ "step": 357
2629
+ },
2630
+ {
2631
+ "epoch": 1.1985259526423082,
2632
+ "grad_norm": 5.800653980259085e-06,
2633
+ "learning_rate": 0.00014451725469436037,
2634
+ "loss": 0.0,
2635
+ "step": 358
2636
+ },
2637
+ {
2638
+ "epoch": 1.201871308347708,
2639
+ "grad_norm": 6.026995379215805e-06,
2640
+ "learning_rate": 0.00014421410026687609,
2641
+ "loss": 0.0,
2642
+ "step": 359
2643
+ },
2644
+ {
2645
+ "epoch": 1.2052166640531075,
2646
+ "grad_norm": 6.040312655386515e-06,
2647
+ "learning_rate": 0.00014391049138546872,
2648
+ "loss": 0.0,
2649
+ "step": 360
2650
+ },
2651
+ {
2652
+ "epoch": 1.2085620197585072,
2653
+ "grad_norm": 6.384302650985774e-06,
2654
+ "learning_rate": 0.00014360643208286887,
2655
+ "loss": 0.0,
2656
+ "step": 361
2657
+ },
2658
+ {
2659
+ "epoch": 1.2119073754639067,
2660
+ "grad_norm": 6.054286586731905e-06,
2661
+ "learning_rate": 0.00014330192639778986,
2662
+ "loss": 0.0,
2663
+ "step": 362
2664
+ },
2665
+ {
2666
+ "epoch": 1.2152527311693064,
2667
+ "grad_norm": 5.83691962674493e-06,
2668
+ "learning_rate": 0.00014299697837487414,
2669
+ "loss": 0.0,
2670
+ "step": 363
2671
+ },
2672
+ {
2673
+ "epoch": 1.218598086874706,
2674
+ "grad_norm": 5.789472197648138e-06,
2675
+ "learning_rate": 0.0001426915920646396,
2676
+ "loss": 0.0,
2677
+ "step": 364
2678
+ },
2679
+ {
2680
+ "epoch": 1.2219434425801057,
2681
+ "grad_norm": 6.030274562363047e-06,
2682
+ "learning_rate": 0.0001423857715234258,
2683
+ "loss": 0.0,
2684
+ "step": 365
2685
+ },
2686
+ {
2687
+ "epoch": 1.2252887982855052,
2688
+ "grad_norm": 6.083088010200299e-06,
2689
+ "learning_rate": 0.00014207952081333992,
2690
+ "loss": 0.0,
2691
+ "step": 366
2692
+ },
2693
+ {
2694
+ "epoch": 1.228634153990905,
2695
+ "grad_norm": 5.530882390303304e-06,
2696
+ "learning_rate": 0.00014177284400220306,
2697
+ "loss": 0.0,
2698
+ "step": 367
2699
+ },
2700
+ {
2701
+ "epoch": 1.2319795096963044,
2702
+ "grad_norm": 5.863201749889413e-06,
2703
+ "learning_rate": 0.00014146574516349595,
2704
+ "loss": 0.0,
2705
+ "step": 368
2706
+ },
2707
+ {
2708
+ "epoch": 1.235324865401704,
2709
+ "grad_norm": 6.111798938945867e-06,
2710
+ "learning_rate": 0.000141158228376305,
2711
+ "loss": 0.0,
2712
+ "step": 369
2713
+ },
2714
+ {
2715
+ "epoch": 1.2386702211071037,
2716
+ "grad_norm": 6.045273494237335e-06,
2717
+ "learning_rate": 0.00014085029772526814,
2718
+ "loss": 0.0,
2719
+ "step": 370
2720
+ },
2721
+ {
2722
+ "epoch": 1.2420155768125032,
2723
+ "grad_norm": 6.002165719110053e-06,
2724
+ "learning_rate": 0.0001405419573005205,
2725
+ "loss": 0.0,
2726
+ "step": 371
2727
+ },
2728
+ {
2729
+ "epoch": 1.245360932517903,
2730
+ "grad_norm": 6.039120307832491e-06,
2731
+ "learning_rate": 0.00014023321119764002,
2732
+ "loss": 0.0,
2733
+ "step": 372
2734
+ },
2735
+ {
2736
+ "epoch": 1.2487062882233024,
2737
+ "grad_norm": 6.003551789035555e-06,
2738
+ "learning_rate": 0.0001399240635175932,
2739
+ "loss": 0.0,
2740
+ "step": 373
2741
+ },
2742
+ {
2743
+ "epoch": 1.2520516439287022,
2744
+ "grad_norm": 5.915948804613436e-06,
2745
+ "learning_rate": 0.00013961451836668043,
2746
+ "loss": 0.0,
2747
+ "step": 374
2748
+ },
2749
+ {
2750
+ "epoch": 1.2553969996341017,
2751
+ "grad_norm": 5.859969860466663e-06,
2752
+ "learning_rate": 0.00013930457985648168,
2753
+ "loss": 0.0,
2754
+ "step": 375
2755
+ },
2756
+ {
2757
+ "epoch": 1.2553969996341017,
2758
+ "eval_loss": 1.7748938319073204e-08,
2759
+ "eval_runtime": 0.6961,
2760
+ "eval_samples_per_second": 71.824,
2761
+ "eval_steps_per_second": 18.674,
2762
+ "step": 375
2763
+ },
2764
+ {
2765
+ "epoch": 1.2587423553395014,
2766
+ "grad_norm": 5.691527803719509e-06,
2767
+ "learning_rate": 0.00013899425210380176,
2768
+ "loss": 0.0,
2769
+ "step": 376
2770
+ },
2771
+ {
2772
+ "epoch": 1.262087711044901,
2773
+ "grad_norm": 5.914226676395629e-06,
2774
+ "learning_rate": 0.00013868353923061563,
2775
+ "loss": 0.0,
2776
+ "step": 377
2777
+ },
2778
+ {
2779
+ "epoch": 1.2654330667503007,
2780
+ "grad_norm": 5.989952114759944e-06,
2781
+ "learning_rate": 0.0001383724453640137,
2782
+ "loss": 0.0,
2783
+ "step": 378
2784
+ },
2785
+ {
2786
+ "epoch": 1.2687784224557002,
2787
+ "grad_norm": 5.64041874895338e-06,
2788
+ "learning_rate": 0.00013806097463614692,
2789
+ "loss": 0.0,
2790
+ "step": 379
2791
+ },
2792
+ {
2793
+ "epoch": 1.2721237781610997,
2794
+ "grad_norm": 6.035153546690708e-06,
2795
+ "learning_rate": 0.00013774913118417195,
2796
+ "loss": 0.0,
2797
+ "step": 380
2798
+ },
2799
+ {
2800
+ "epoch": 1.2754691338664994,
2801
+ "grad_norm": 5.875488113815663e-06,
2802
+ "learning_rate": 0.0001374369191501963,
2803
+ "loss": 0.0,
2804
+ "step": 381
2805
+ },
2806
+ {
2807
+ "epoch": 1.2788144895718991,
2808
+ "grad_norm": 6.396473509084899e-06,
2809
+ "learning_rate": 0.00013712434268122324,
2810
+ "loss": 0.0,
2811
+ "step": 382
2812
+ },
2813
+ {
2814
+ "epoch": 1.2821598452772986,
2815
+ "grad_norm": 5.975215117359767e-06,
2816
+ "learning_rate": 0.00013681140592909652,
2817
+ "loss": 0.0,
2818
+ "step": 383
2819
+ },
2820
+ {
2821
+ "epoch": 1.2855052009826982,
2822
+ "grad_norm": 5.922964191995561e-06,
2823
+ "learning_rate": 0.00013649811305044558,
2824
+ "loss": 0.0,
2825
+ "step": 384
2826
+ },
2827
+ {
2828
+ "epoch": 1.288850556688098,
2829
+ "grad_norm": 5.88319653616054e-06,
2830
+ "learning_rate": 0.00013618446820663015,
2831
+ "loss": 0.0,
2832
+ "step": 385
2833
+ },
2834
+ {
2835
+ "epoch": 1.2921959123934974,
2836
+ "grad_norm": 6.589156328118406e-06,
2837
+ "learning_rate": 0.00013587047556368493,
2838
+ "loss": 0.0,
2839
+ "step": 386
2840
+ },
2841
+ {
2842
+ "epoch": 1.2955412680988971,
2843
+ "grad_norm": 5.7994789131043945e-06,
2844
+ "learning_rate": 0.00013555613929226433,
2845
+ "loss": 0.0,
2846
+ "step": 387
2847
+ },
2848
+ {
2849
+ "epoch": 1.2988866238042966,
2850
+ "grad_norm": 5.76379807171179e-06,
2851
+ "learning_rate": 0.00013524146356758704,
2852
+ "loss": 0.0,
2853
+ "step": 388
2854
+ },
2855
+ {
2856
+ "epoch": 1.3022319795096964,
2857
+ "grad_norm": 5.537129709409783e-06,
2858
+ "learning_rate": 0.00013492645256938068,
2859
+ "loss": 0.0,
2860
+ "step": 389
2861
+ },
2862
+ {
2863
+ "epoch": 1.305577335215096,
2864
+ "grad_norm": 5.805138698633527e-06,
2865
+ "learning_rate": 0.00013461111048182608,
2866
+ "loss": 0.0,
2867
+ "step": 390
2868
+ },
2869
+ {
2870
+ "epoch": 1.3089226909204954,
2871
+ "grad_norm": 5.993603735987563e-06,
2872
+ "learning_rate": 0.00013429544149350187,
2873
+ "loss": 0.0,
2874
+ "step": 391
2875
+ },
2876
+ {
2877
+ "epoch": 1.3122680466258951,
2878
+ "grad_norm": 5.531137048819801e-06,
2879
+ "learning_rate": 0.00013397944979732872,
2880
+ "loss": 0.0,
2881
+ "step": 392
2882
+ },
2883
+ {
2884
+ "epoch": 1.3156134023312949,
2885
+ "grad_norm": 5.714673534384929e-06,
2886
+ "learning_rate": 0.00013366313959051383,
2887
+ "loss": 0.0,
2888
+ "step": 393
2889
+ },
2890
+ {
2891
+ "epoch": 1.3189587580366944,
2892
+ "grad_norm": 5.987948497931939e-06,
2893
+ "learning_rate": 0.000133346515074495,
2894
+ "loss": 0.0,
2895
+ "step": 394
2896
+ },
2897
+ {
2898
+ "epoch": 1.322304113742094,
2899
+ "grad_norm": 6.1729906519758515e-06,
2900
+ "learning_rate": 0.00013302958045488493,
2901
+ "loss": 0.0,
2902
+ "step": 395
2903
+ },
2904
+ {
2905
+ "epoch": 1.3256494694474936,
2906
+ "grad_norm": 5.74833165956079e-06,
2907
+ "learning_rate": 0.00013271233994141516,
2908
+ "loss": 0.0,
2909
+ "step": 396
2910
+ },
2911
+ {
2912
+ "epoch": 1.3289948251528931,
2913
+ "grad_norm": 5.835729098180309e-06,
2914
+ "learning_rate": 0.0001323947977478806,
2915
+ "loss": 0.0,
2916
+ "step": 397
2917
+ },
2918
+ {
2919
+ "epoch": 1.3323401808582929,
2920
+ "grad_norm": 5.932717158430023e-06,
2921
+ "learning_rate": 0.00013207695809208295,
2922
+ "loss": 0.0,
2923
+ "step": 398
2924
+ },
2925
+ {
2926
+ "epoch": 1.3356855365636924,
2927
+ "grad_norm": 6.128618224465754e-06,
2928
+ "learning_rate": 0.00013175882519577526,
2929
+ "loss": 0.0,
2930
+ "step": 399
2931
+ },
2932
+ {
2933
+ "epoch": 1.339030892269092,
2934
+ "grad_norm": 5.671974122378742e-06,
2935
+ "learning_rate": 0.00013144040328460545,
2936
+ "loss": 0.0,
2937
+ "step": 400
2938
+ },
2939
+ {
2940
+ "epoch": 1.339030892269092,
2941
+ "eval_loss": 1.7484028447256605e-08,
2942
+ "eval_runtime": 0.6964,
2943
+ "eval_samples_per_second": 71.794,
2944
+ "eval_steps_per_second": 18.666,
2945
+ "step": 400
2946
  }
2947
  ],
2948
  "logging_steps": 1,
 
2971
  "attributes": {}
2972
  }
2973
  },
2974
+ "total_flos": 2.518464884165837e+18,
2975
  "train_batch_size": 1,
2976
  "trial_name": null,
2977
  "trial_params": null