Training in progress, step 100, checkpoint

Browse files

Files changed (5) hide show

last-checkpoint/adapter_model.safetensors +1 -1
last-checkpoint/optimizer.pt +1 -1
last-checkpoint/rng_state.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +371 -5

last-checkpoint/adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0958b7c2f0601e62bee2087e2007d80b5b2acfceb5f2a49fe52f88f3273d6630
 size 63592

 version https://git-lfs.github.com/spec/v1
+oid sha256:cd2f671d7188fe94c623790ad2e45a520a6bca830cb2204cefb7c47dcd122800
 size 63592

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:20e564c96da04714fdfd654420aee3868eda81ce312b8db914eba996352a1f57
 size 136814

 version https://git-lfs.github.com/spec/v1
+oid sha256:25479eb18f573d534f178ada9b9e969f221e608667a9ca3e432f82ff0d14f87c
 size 136814

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:46557ca5ec0a1a09e98baf727fca56ea3045f27e6aa2396d49d26a03ff7dcceb
 size 14244

 version https://git-lfs.github.com/spec/v1
+oid sha256:8d00b899626390a5d0d26e6a5ae73a9418e5abd2a9a1c34b8878959082261e1a
 size 14244

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9f1cc9afd56c8282f59607c4f273b7bc247d970043d317c3651db16eb05eaec8
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:33fdeac700456f590554babdb1bb06f3350befc1bacd0a19b3d33f64c1a1c32d
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
-  "best_metric": 10.535416603088379,
-  "best_model_checkpoint": "miner_id_24/checkpoint-50",
-  "epoch": 0.7604562737642585,
   "eval_steps": 25,
-  "global_step": 50,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -381,6 +381,372 @@
       "eval_samples_per_second": 463.538,
       "eval_steps_per_second": 64.895,
       "step": 50
     }
   ],
   "logging_steps": 1,
@@ -409,7 +775,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 397737984000.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null

 {
+  "best_metric": 10.526249885559082,
+  "best_model_checkpoint": "miner_id_24/checkpoint-100",
+  "epoch": 1.5285171102661597,
   "eval_steps": 25,
+  "global_step": 100,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 463.538,
       "eval_steps_per_second": 64.895,
       "step": 50
+    },
+    {
+      "epoch": 0.7756653992395437,
+      "grad_norm": 0.7378947138786316,
+      "learning_rate": 0.00022388221019307967,
+      "loss": 42.1719,
+      "step": 51
+    },
+    {
+      "epoch": 0.7908745247148289,
+      "grad_norm": 0.8719134330749512,
+      "learning_rate": 0.000220496511454098,
+      "loss": 42.1641,
+      "step": 52
+    },
+    {
+      "epoch": 0.8060836501901141,
+      "grad_norm": 0.7421266436576843,
+      "learning_rate": 0.00021706406894776709,
+      "loss": 42.2005,
+      "step": 53
+    },
+    {
+      "epoch": 0.8212927756653993,
+      "grad_norm": 0.8768603205680847,
+      "learning_rate": 0.0002135871586064791,
+      "loss": 42.1771,
+      "step": 54
+    },
+    {
+      "epoch": 0.8365019011406845,
+      "grad_norm": 0.6931475400924683,
+      "learning_rate": 0.00021006808584768998,
+      "loss": 42.1979,
+      "step": 55
+    },
+    {
+      "epoch": 0.8517110266159695,
+      "grad_norm": 0.5837717652320862,
+      "learning_rate": 0.00020650918404527775,
+      "loss": 42.1406,
+      "step": 56
+    },
+    {
+      "epoch": 0.8669201520912547,
+      "grad_norm": 0.712273895740509,
+      "learning_rate": 0.00020291281298236423,
+      "loss": 42.1328,
+      "step": 57
+    },
+    {
+      "epoch": 0.8821292775665399,
+      "grad_norm": 0.8226441740989685,
+      "learning_rate": 0.00019928135728662522,
+      "loss": 42.1745,
+      "step": 58
+    },
+    {
+      "epoch": 0.8973384030418251,
+      "grad_norm": 0.617861270904541,
+      "learning_rate": 0.0001956172248491277,
+      "loss": 42.1719,
+      "step": 59
+    },
+    {
+      "epoch": 0.9125475285171103,
+      "grad_norm": 0.6812199950218201,
+      "learning_rate": 0.00019192284522774142,
+      "loss": 42.1484,
+      "step": 60
+    },
+    {
+      "epoch": 0.9277566539923955,
+      "grad_norm": 0.6634241342544556,
+      "learning_rate": 0.00018820066803618428,
+      "loss": 42.1615,
+      "step": 61
+    },
+    {
+      "epoch": 0.9429657794676806,
+      "grad_norm": 0.5809543132781982,
+      "learning_rate": 0.00018445316131976934,
+      "loss": 42.1432,
+      "step": 62
+    },
+    {
+      "epoch": 0.9581749049429658,
+      "grad_norm": 0.8210413455963135,
+      "learning_rate": 0.00018068280991893014,
+      "loss": 42.1901,
+      "step": 63
+    },
+    {
+      "epoch": 0.973384030418251,
+      "grad_norm": 1.388875961303711,
+      "learning_rate": 0.00017689211382161034,
+      "loss": 42.2708,
+      "step": 64
+    },
+    {
+      "epoch": 0.9885931558935361,
+      "grad_norm": 0.802690327167511,
+      "learning_rate": 0.00017308358650560928,
+      "loss": 42.1328,
+      "step": 65
+    },
+    {
+      "epoch": 1.0114068441064639,
+      "grad_norm": 0.6896817088127136,
+      "learning_rate": 0.00016925975327198266,
+      "loss": 42.1927,
+      "step": 66
+    },
+    {
+      "epoch": 1.026615969581749,
+      "grad_norm": 0.818332850933075,
+      "learning_rate": 0.00016542314957060405,
+      "loss": 42.1562,
+      "step": 67
+    },
+    {
+      "epoch": 1.0418250950570342,
+      "grad_norm": 0.49196258187294006,
+      "learning_rate": 0.00016157631931899697,
+      "loss": 42.125,
+      "step": 68
+    },
+    {
+      "epoch": 1.0570342205323193,
+      "grad_norm": 0.43189793825149536,
+      "learning_rate": 0.00015772181321555196,
+      "loss": 42.1172,
+      "step": 69
+    },
+    {
+      "epoch": 1.0722433460076046,
+      "grad_norm": 0.3118878901004791,
+      "learning_rate": 0.0001538621870482483,
+      "loss": 42.1068,
+      "step": 70
+    },
+    {
+      "epoch": 1.0874524714828897,
+      "grad_norm": 0.5249186158180237,
+      "learning_rate": 0.00015,
+      "loss": 42.151,
+      "step": 71
+    },
+    {
+      "epoch": 1.102661596958175,
+      "grad_norm": 0.6453397274017334,
+      "learning_rate": 0.00014613781295175172,
+      "loss": 42.1589,
+      "step": 72
+    },
+    {
+      "epoch": 1.11787072243346,
+      "grad_norm": 0.34862664341926575,
+      "learning_rate": 0.000142278186784448,
+      "loss": 42.125,
+      "step": 73
+    },
+    {
+      "epoch": 1.1330798479087452,
+      "grad_norm": 0.37730568647384644,
+      "learning_rate": 0.00013842368068100303,
+      "loss": 42.1198,
+      "step": 74
+    },
+    {
+      "epoch": 1.1482889733840305,
+      "grad_norm": 0.39992350339889526,
+      "learning_rate": 0.00013457685042939592,
+      "loss": 42.138,
+      "step": 75
+    },
+    {
+      "epoch": 1.1482889733840305,
+      "eval_loss": 10.527915954589844,
+      "eval_runtime": 0.106,
+      "eval_samples_per_second": 471.538,
+      "eval_steps_per_second": 66.015,
+      "step": 75
+    },
+    {
+      "epoch": 1.1634980988593155,
+      "grad_norm": 0.9189149141311646,
+      "learning_rate": 0.00013074024672801731,
+      "loss": 42.1536,
+      "step": 76
+    },
+    {
+      "epoch": 1.1787072243346008,
+      "grad_norm": 0.5288547873497009,
+      "learning_rate": 0.0001269164134943907,
+      "loss": 42.1667,
+      "step": 77
+    },
+    {
+      "epoch": 1.193916349809886,
+      "grad_norm": 0.5245852470397949,
+      "learning_rate": 0.00012310788617838966,
+      "loss": 42.1432,
+      "step": 78
+    },
+    {
+      "epoch": 1.209125475285171,
+      "grad_norm": 0.670550525188446,
+      "learning_rate": 0.0001193171900810699,
+      "loss": 42.1536,
+      "step": 79
+    },
+    {
+      "epoch": 1.2243346007604563,
+      "grad_norm": 0.6620810031890869,
+      "learning_rate": 0.00011554683868023067,
+      "loss": 42.1406,
+      "step": 80
+    },
+    {
+      "epoch": 1.2395437262357414,
+      "grad_norm": 0.7172536253929138,
+      "learning_rate": 0.0001117993319638157,
+      "loss": 42.1667,
+      "step": 81
+    },
+    {
+      "epoch": 1.2547528517110267,
+      "grad_norm": 0.6294752359390259,
+      "learning_rate": 0.00010807715477225858,
+      "loss": 42.1797,
+      "step": 82
+    },
+    {
+      "epoch": 1.2699619771863118,
+      "grad_norm": 0.5978144407272339,
+      "learning_rate": 0.00010438277515087233,
+      "loss": 42.1667,
+      "step": 83
+    },
+    {
+      "epoch": 1.285171102661597,
+      "grad_norm": 0.5058898329734802,
+      "learning_rate": 0.00010071864271337478,
+      "loss": 42.125,
+      "step": 84
+    },
+    {
+      "epoch": 1.3003802281368821,
+      "grad_norm": 0.5284583568572998,
+      "learning_rate": 9.708718701763577e-05,
+      "loss": 42.112,
+      "step": 85
+    },
+    {
+      "epoch": 1.3155893536121672,
+      "grad_norm": 0.5570880770683289,
+      "learning_rate": 9.34908159547222e-05,
+      "loss": 42.1302,
+      "step": 86
+    },
+    {
+      "epoch": 1.3307984790874525,
+      "grad_norm": 0.44924110174179077,
+      "learning_rate": 8.993191415231e-05,
+      "loss": 42.1536,
+      "step": 87
+    },
+    {
+      "epoch": 1.3460076045627376,
+      "grad_norm": 0.5590141415596008,
+      "learning_rate": 8.641284139352091e-05,
+      "loss": 42.1406,
+      "step": 88
+    },
+    {
+      "epoch": 1.3612167300380227,
+      "grad_norm": 0.3548166751861572,
+      "learning_rate": 8.293593105223287e-05,
+      "loss": 42.1042,
+      "step": 89
+    },
+    {
+      "epoch": 1.376425855513308,
+      "grad_norm": 0.46111956238746643,
+      "learning_rate": 7.950348854590204e-05,
+      "loss": 42.112,
+      "step": 90
+    },
+    {
+      "epoch": 1.3916349809885933,
+      "grad_norm": 0.2855987846851349,
+      "learning_rate": 7.611778980692035e-05,
+      "loss": 42.1094,
+      "step": 91
+    },
+    {
+      "epoch": 1.4068441064638784,
+      "grad_norm": 0.3709195852279663,
+      "learning_rate": 7.278107977352543e-05,
+      "loss": 42.138,
+      "step": 92
+    },
+    {
+      "epoch": 1.4220532319391634,
+      "grad_norm": 0.8047066330909729,
+      "learning_rate": 6.949557090125994e-05,
+      "loss": 42.1667,
+      "step": 93
+    },
+    {
+      "epoch": 1.4372623574144487,
+      "grad_norm": 0.5303740501403809,
+      "learning_rate": 6.626344169597031e-05,
+      "loss": 42.125,
+      "step": 94
+    },
+    {
+      "epoch": 1.4524714828897338,
+      "grad_norm": 0.41722556948661804,
+      "learning_rate": 6.308683526931545e-05,
+      "loss": 42.125,
+      "step": 95
+    },
+    {
+      "epoch": 1.467680608365019,
+      "grad_norm": 1.1801074743270874,
+      "learning_rate": 5.996785791774478e-05,
+      "loss": 42.2188,
+      "step": 96
+    },
+    {
+      "epoch": 1.4828897338403042,
+      "grad_norm": 0.8621243238449097,
+      "learning_rate": 5.690857772588657e-05,
+      "loss": 42.1849,
+      "step": 97
+    },
+    {
+      "epoch": 1.4980988593155893,
+      "grad_norm": 1.1932792663574219,
+      "learning_rate": 5.391102319527373e-05,
+      "loss": 42.2187,
+      "step": 98
+    },
+    {
+      "epoch": 1.5133079847908744,
+      "grad_norm": 0.38025224208831787,
+      "learning_rate": 5.0977181899315214e-05,
+      "loss": 42.1276,
+      "step": 99
+    },
+    {
+      "epoch": 1.5285171102661597,
+      "grad_norm": 0.39083558320999146,
+      "learning_rate": 4.8108999165406026e-05,
+      "loss": 42.1458,
+      "step": 100
+    },
+    {
+      "epoch": 1.5285171102661597,
+      "eval_loss": 10.526249885559082,
+      "eval_runtime": 0.1068,
+      "eval_samples_per_second": 467.988,
+      "eval_steps_per_second": 65.518,
+      "step": 100
     }
   ],
   "logging_steps": 1,
       "attributes": {}
     }
   },
+  "total_flos": 795475968000.0,
   "train_batch_size": 8,
   "trial_name": null,
   "trial_params": null