huggingartists

Browse files

Files changed (10) hide show

README.md +3 -3
config.json +1 -1
evaluation.txt +1 -1
flax_model.msgpack +1 -1
optimizer.pt +1 -1
pytorch_model.bin +1 -1
rng_state.pth +2 -2
scheduler.pt +1 -1
trainer_state.json +213 -7
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -45,15 +45,15 @@ from datasets import load_dataset
 dataset = load_dataset("huggingartists/boris-grebenshikov")
 ```
-[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/2r073xb9/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Борис Гребенщиков (Boris Grebenshikov)'s lyrics.
-Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1crr7kfj) for full transparency and reproducibility.
-At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1crr7kfj/artifacts) is logged and versioned.
 ## How to use

 dataset = load_dataset("huggingartists/boris-grebenshikov")
 ```
+[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/3usdyy9d/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Борис Гребенщиков (Boris Grebenshikov)'s lyrics.
+Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1hazzf7o) for full transparency and reproducibility.
+At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1hazzf7o/artifacts) is logged and versioned.
 ## How to use

config.json CHANGED Viewed

@@ -35,7 +35,7 @@
     }
   },
   "torch_dtype": "float32",
-  "transformers_version": "4.10.0",
   "use_cache": true,
   "vocab_size": 50257
 }

     }
   },
   "torch_dtype": "float32",
+  "transformers_version": "4.10.2",
   "use_cache": true,
   "vocab_size": 50257
 }

evaluation.txt CHANGED Viewed

	@@ -1 +1 @@
1	- {"eval_loss": 1.~~7017050981521606~~, "eval_runtime": 10.~~4031~~, "eval_samples_per_second": 21.~~916~~, "eval_steps_per_second": 2.~~788~~, "epoch": 2.0}


1	+ {"eval_loss": 1.6859837770462036, "eval_runtime": 11.1348, "eval_samples_per_second": 22.452, "eval_steps_per_second": 2.874, "epoch": 3.0}

flax_model.msgpack CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4caf1777217e0c052ac94bcbeca458605d7be5f6d3bc748e776565bc142dc42e
 size 497764120

 version https://git-lfs.github.com/spec/v1
+oid sha256:2d13a88d644111679b0e771a0604a0cc7571d6c6d0aa4f447d2b3e3cd1d73673
 size 497764120

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:96d9aef799951a3d3c5d3a2ed572f49f197e45f0dcd10dae5417b75b54419805
 size 995604017

 version https://git-lfs.github.com/spec/v1
+oid sha256:e6ad4d0daa593acffd671b9972c54cffa4ab3c9e35e5e977831f28162a0e3296
 size 995604017

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b6c2602caf4839e464970f374187b18adcfa27e0d6a45c9600bd1c38b30141b9
 size 510403817

 version https://git-lfs.github.com/spec/v1
+oid sha256:edcb7068016669cd7d3686ab784b0f9a1369a4fdc7e206c64f18a1eeb8f4b6d1
 size 510403817

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:aa2e4cc4de77dfbc81bb3c499c062d1ba2bdbc2ed6f970ac39c58f271b8d2a5f
-size 14503

 version https://git-lfs.github.com/spec/v1
+oid sha256:21247813c84eaba6dd1c8fa5e87e39109d5255edf8b9566147088529c2be57c7
+size 14567

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7613c90c6af68cbcc9693f8cef8467997ccd45f7eec064cfce1c9281a578f4fe
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:87ba49cc9b309f097ae74c170e865f98bf5e5b8b130e72a509a3e15fdec2e6c1
 size 623

trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-  "best_metric": 1.7017050981521606,
-  "best_model_checkpoint": "output/boris-grebenshikov/checkpoint-346",
-  "epoch": 2.0,
-  "global_step": 346,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -444,11 +444,217 @@
       "eval_samples_per_second": 22.18,
       "eval_steps_per_second": 2.821,
       "step": 346
     }
   ],
-  "max_steps": 346,
-  "num_train_epochs": 2,
-  "total_flos": 359537836032000.0,
   "trial_name": null,
   "trial_params": null
 }

 {
+  "best_metric": 1.6859837770462036,
+  "best_model_checkpoint": "output/boris-grebenshikov/checkpoint-510",
+  "epoch": 3.0,
+  "global_step": 510,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 22.18,
       "eval_steps_per_second": 2.821,
       "step": 346
+    },
+    {
+      "epoch": 2.06,
+      "learning_rate": 0.00013603195463831566,
+      "loss": 1.8257,
+      "step": 350
+    },
+    {
+      "epoch": 2.09,
+      "learning_rate": 0.00013458123912165538,
+      "loss": 1.7795,
+      "step": 355
+    },
+    {
+      "epoch": 2.12,
+      "learning_rate": 0.00013256759493713883,
+      "loss": 1.6858,
+      "step": 360
+    },
+    {
+      "epoch": 2.15,
+      "learning_rate": 0.0001300082017869573,
+      "loss": 1.8715,
+      "step": 365
+    },
+    {
+      "epoch": 2.18,
+      "learning_rate": 0.00012692489551105156,
+      "loss": 1.7653,
+      "step": 370
+    },
+    {
+      "epoch": 2.21,
+      "learning_rate": 0.0001233439817914244,
+      "loss": 1.7267,
+      "step": 375
+    },
+    {
+      "epoch": 2.24,
+      "learning_rate": 0.0001192960117213372,
+      "loss": 1.7223,
+      "step": 380
+    },
+    {
+      "epoch": 2.26,
+      "learning_rate": 0.00011481552115415387,
+      "loss": 1.7739,
+      "step": 385
+    },
+    {
+      "epoch": 2.29,
+      "learning_rate": 0.00010994073605561706,
+      "loss": 1.7485,
+      "step": 390
+    },
+    {
+      "epoch": 2.32,
+      "learning_rate": 0.00010471324637338657,
+      "loss": 1.7833,
+      "step": 395
+    },
+    {
+      "epoch": 2.35,
+      "learning_rate": 9.917765120627052e-05,
+      "loss": 1.7504,
+      "step": 400
+    },
+    {
+      "epoch": 2.38,
+      "learning_rate": 9.338117830043871e-05,
+      "loss": 1.7411,
+      "step": 405
+    },
+    {
+      "epoch": 2.41,
+      "learning_rate": 8.737328111894491e-05,
+      "loss": 1.7067,
+      "step": 410
+    },
+    {
+      "epoch": 2.44,
+      "learning_rate": 8.120521692221671e-05,
+      "loss": 1.7285,
+      "step": 415
+    },
+    {
+      "epoch": 2.47,
+      "learning_rate": 7.492960945918252e-05,
+      "loss": 1.7692,
+      "step": 420
+    },
+    {
+      "epoch": 2.5,
+      "learning_rate": 6.860000000000001e-05,
+      "loss": 1.7354,
+      "step": 425
+    },
+    {
+      "epoch": 2.53,
+      "learning_rate": 6.227039054081752e-05,
+      "loss": 1.7446,
+      "step": 430
+    },
+    {
+      "epoch": 2.56,
+      "learning_rate": 5.599478307778333e-05,
+      "loss": 1.8284,
+      "step": 435
+    },
+    {
+      "epoch": 2.59,
+      "learning_rate": 4.9826718881055135e-05,
+      "loss": 1.7434,
+      "step": 440
+    },
+    {
+      "epoch": 2.62,
+      "learning_rate": 4.381882169956128e-05,
+      "loss": 1.6965,
+      "step": 445
+    },
+    {
+      "epoch": 2.65,
+      "learning_rate": 3.8022348793729525e-05,
+      "loss": 1.7405,
+      "step": 450
+    },
+    {
+      "epoch": 2.68,
+      "learning_rate": 3.24867536266134e-05,
+      "loss": 1.7292,
+      "step": 455
+    },
+    {
+      "epoch": 2.71,
+      "learning_rate": 2.7259263944382986e-05,
+      "loss": 1.7385,
+      "step": 460
+    },
+    {
+      "epoch": 2.74,
+      "learning_rate": 2.2384478845846205e-05,
+      "loss": 1.6668,
+      "step": 465
+    },
+    {
+      "epoch": 2.76,
+      "learning_rate": 1.7903988278662788e-05,
+      "loss": 1.591,
+      "step": 470
+    },
+    {
+      "epoch": 2.79,
+      "learning_rate": 1.3856018208575617e-05,
+      "loss": 1.7083,
+      "step": 475
+    },
+    {
+      "epoch": 2.82,
+      "learning_rate": 1.0275104488948488e-05,
+      "loss": 1.6955,
+      "step": 480
+    },
+    {
+      "epoch": 2.85,
+      "learning_rate": 7.191798213042723e-06,
+      "loss": 1.7392,
+      "step": 485
+    },
+    {
+      "epoch": 2.88,
+      "learning_rate": 4.6324050628612214e-06,
+      "loss": 1.7917,
+      "step": 490
+    },
+    {
+      "epoch": 2.91,
+      "learning_rate": 2.6187608783446213e-06,
+      "loss": 1.7505,
+      "step": 495
+    },
+    {
+      "epoch": 2.94,
+      "learning_rate": 1.1680453616843376e-06,
+      "loss": 1.7224,
+      "step": 500
+    },
+    {
+      "epoch": 2.97,
+      "learning_rate": 2.926355061606279e-07,
+      "loss": 1.6373,
+      "step": 505
+    },
+    {
+      "epoch": 3.0,
+      "learning_rate": 0.0,
+      "loss": 1.6567,
+      "step": 510
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 1.6859837770462036,
+      "eval_runtime": 10.9838,
+      "eval_samples_per_second": 22.761,
+      "eval_steps_per_second": 2.913,
+      "step": 510
     }
   ],
+  "max_steps": 510,
+  "num_train_epochs": 3,
+  "total_flos": 530422824960000.0,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a577c92f63c7982cffa4f5373df3f9b2218b8fba8ae03f5f9b92e96c6b2421a2
 size 2735

 version https://git-lfs.github.com/spec/v1
+oid sha256:cf593eb68797dde3a11c261e20993644e6e8d1db2915dab364fbd4a5c0431556
 size 2735