huggingartists

Browse files

Files changed (11) hide show

README.md +3 -3
config.json +1 -1
evaluation.txt +1 -1
flax_model.msgpack +1 -1
optimizer.pt +2 -2
pytorch_model.bin +1 -1
rng_state.pth +2 -2
scheduler.pt +1 -1
tokenizer_config.json +1 -1
trainer_state.json +239 -7
training_args.bin +2 -2

README.md CHANGED Viewed

@@ -45,15 +45,15 @@ from datasets import load_dataset
 dataset = load_dataset("huggingartists/boris-grebenshikov")
 ```
-[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/17fz2zsy/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Борис Гребенщиков (Boris Grebenshikov)'s lyrics.
-Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/31rj6k29) for full transparency and reproducibility.
-At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/31rj6k29/artifacts) is logged and versioned.
 ## How to use

 dataset = load_dataset("huggingartists/boris-grebenshikov")
 ```
+[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/2r073xb9/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
 ## Training procedure
 The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Борис Гребенщиков (Boris Grebenshikov)'s lyrics.
+Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1crr7kfj) for full transparency and reproducibility.
+At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1crr7kfj/artifacts) is logged and versioned.
 ## How to use

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "gpt2",
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"

 {
+  "_name_or_path": "huggingartists/boris-grebenshikov",
   "activation_function": "gelu_new",
   "architectures": [
     "GPT2LMHeadModel"

evaluation.txt CHANGED Viewed

	@@ -1 +1 @@
1	- {"eval_loss": 1.~~8803279399871826~~, "eval_runtime": 11.~~6498~~, "eval_samples_per_second": 22.~~146~~, "eval_steps_per_second": 2.~~833~~, "epoch": 1.0}


1	+ {"eval_loss": 1.7017050981521606, "eval_runtime": 10.4031, "eval_samples_per_second": 21.916, "eval_steps_per_second": 2.788, "epoch": 2.0}

flax_model.msgpack CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:84a8f55a6dc86a57ae673545b08215bced477db9d2d2cd81be98fdbb18904000
 size 497764120

 version https://git-lfs.github.com/spec/v1
+oid sha256:4caf1777217e0c052ac94bcbeca458605d7be5f6d3bc748e776565bc142dc42e
 size 497764120

optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9f36cc26900ca1466b9c88484f52c35f0b2d73669efd66a283ace9615cbba189
-size 995603825

 version https://git-lfs.github.com/spec/v1
+oid sha256:96d9aef799951a3d3c5d3a2ed572f49f197e45f0dcd10dae5417b75b54419805
+size 995604017

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bdc01c8e9ac3f6ea36007a96f792a90996189530c114a4423b1b16d1d892899b
 size 510403817

 version https://git-lfs.github.com/spec/v1
+oid sha256:b6c2602caf4839e464970f374187b18adcfa27e0d6a45c9600bd1c38b30141b9
 size 510403817

rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a3de7f451560a3fb6592fc184c9d1f804bb0cdb4ea9155d6284323d66d6063ab
-size 14567

 version https://git-lfs.github.com/spec/v1
+oid sha256:aa2e4cc4de77dfbc81bb3c499c062d1ba2bdbc2ed6f970ac39c58f271b8d2a5f
+size 14503

scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ceb7b588e7deba2b4499d6c6c3ea0e08b8017594086c1e44c612c131e26fdc94
 size 623

 version https://git-lfs.github.com/spec/v1
+oid sha256:7613c90c6af68cbcc9693f8cef8467997ccd45f7eec064cfce1c9281a578f4fe
 size 623

tokenizer_config.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"unk_token": "<\|endoftext\|>", "bos_token": "<\|endoftext\|>", "eos_token": "<\|endoftext\|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "~~gpt2~~", "tokenizer_class": "GPT2Tokenizer"}


1	+ {"unk_token": "<\|endoftext\|>", "bos_token": "<\|endoftext\|>", "eos_token": "<\|endoftext\|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "huggingartists/boris-grebenshikov", "tokenizer_class": "GPT2Tokenizer"}

trainer_state.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
-  "best_metric": 1.8803279399871826,
-  "best_model_checkpoint": "output/boris-grebenshikov/checkpoint-169",
-  "epoch": 1.0,
-  "global_step": 169,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -212,11 +212,243 @@
       "eval_samples_per_second": 22.279,
       "eval_steps_per_second": 2.85,
       "step": 169
     }
   ],
-  "max_steps": 169,
-  "num_train_epochs": 1,
-  "total_flos": 176110829568000.0,
   "trial_name": null,
   "trial_params": null
 }

 {
+  "best_metric": 1.7017050981521606,
+  "best_model_checkpoint": "output/boris-grebenshikov/checkpoint-346",
+  "epoch": 2.0,
+  "global_step": 346,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "eval_samples_per_second": 22.279,
       "eval_steps_per_second": 2.85,
       "step": 169
+    },
+    {
+      "epoch": 0.98,
+      "learning_rate": 1.0177397892100256e-07,
+      "loss": 1.9274,
+      "step": 170
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 1.744057536125183,
+      "eval_runtime": 10.1523,
+      "eval_samples_per_second": 22.458,
+      "eval_steps_per_second": 2.856,
+      "step": 173
+    },
+    {
+      "epoch": 1.01,
+      "learning_rate": 4.523909530405488e-08,
+      "loss": 1.937,
+      "step": 175
+    },
+    {
+      "epoch": 1.04,
+      "learning_rate": 5.534939285797931e-07,
+      "loss": 1.8799,
+      "step": 180
+    },
+    {
+      "epoch": 1.07,
+      "learning_rate": 1.622351211524088e-06,
+      "loss": 1.8935,
+      "step": 185
+    },
+    {
+      "epoch": 1.1,
+      "learning_rate": 3.243005142843674e-06,
+      "loss": 1.873,
+      "step": 190
+    },
+    {
+      "epoch": 1.13,
+      "learning_rate": 5.402103933939619e-06,
+      "loss": 1.8993,
+      "step": 195
+    },
+    {
+      "epoch": 1.16,
+      "learning_rate": 8.081859807874568e-06,
+      "loss": 1.9433,
+      "step": 200
+    },
+    {
+      "epoch": 1.18,
+      "learning_rate": 1.1260195544315306e-05,
+      "loss": 1.8802,
+      "step": 205
+    },
+    {
+      "epoch": 1.21,
+      "learning_rate": 1.491092636313743e-05,
+      "loss": 1.9911,
+      "step": 210
+    },
+    {
+      "epoch": 1.24,
+      "learning_rate": 1.90039756482401e-05,
+      "loss": 1.9398,
+      "step": 215
+    },
+    {
+      "epoch": 1.27,
+      "learning_rate": 2.350562273432663e-05,
+      "loss": 1.7847,
+      "step": 220
+    },
+    {
+      "epoch": 1.3,
+      "learning_rate": 2.8378780715254437e-05,
+      "loss": 1.8382,
+      "step": 225
+    },
+    {
+      "epoch": 1.33,
+      "learning_rate": 3.358330198522479e-05,
+      "loss": 1.9425,
+      "step": 230
+    },
+    {
+      "epoch": 1.36,
+      "learning_rate": 3.907630899560607e-05,
+      "loss": 1.9129,
+      "step": 235
+    },
+    {
+      "epoch": 1.39,
+      "learning_rate": 4.4812547502442855e-05,
+      "loss": 1.8725,
+      "step": 240
+    },
+    {
+      "epoch": 1.42,
+      "learning_rate": 5.0744759394415807e-05,
+      "loss": 1.9258,
+      "step": 245
+    },
+    {
+      "epoch": 1.45,
+      "learning_rate": 5.682407202970108e-05,
+      "loss": 1.8416,
+      "step": 250
+    },
+    {
+      "epoch": 1.47,
+      "learning_rate": 6.300040087417042e-05,
+      "loss": 1.8097,
+      "step": 255
+    },
+    {
+      "epoch": 1.5,
+      "learning_rate": 6.922286212378929e-05,
+      "loss": 1.8724,
+      "step": 260
+    },
+    {
+      "epoch": 1.53,
+      "learning_rate": 7.544019191181583e-05,
+      "loss": 1.8985,
+      "step": 265
+    },
+    {
+      "epoch": 1.56,
+      "learning_rate": 8.160116864715307e-05,
+      "loss": 1.877,
+      "step": 270
+    },
+    {
+      "epoch": 1.59,
+      "learning_rate": 8.765503500441301e-05,
+      "loss": 1.8627,
+      "step": 275
+    },
+    {
+      "epoch": 1.62,
+      "learning_rate": 9.35519160891186e-05,
+      "loss": 1.7856,
+      "step": 280
+    },
+    {
+      "epoch": 1.65,
+      "learning_rate": 9.92432303329815e-05,
+      "loss": 1.8525,
+      "step": 285
+    },
+    {
+      "epoch": 1.68,
+      "learning_rate": 0.00010468208973408737,
+      "loss": 1.9036,
+      "step": 290
+    },
+    {
+      "epoch": 1.71,
+      "learning_rate": 0.00010982368614460176,
+      "loss": 1.8999,
+      "step": 295
+    },
+    {
+      "epoch": 1.73,
+      "learning_rate": 0.00011462566042355846,
+      "loss": 1.866,
+      "step": 300
+    },
+    {
+      "epoch": 1.76,
+      "learning_rate": 0.00011904845141345724,
+      "loss": 1.9433,
+      "step": 305
+    },
+    {
+      "epoch": 1.79,
+      "learning_rate": 0.00012305562186562137,
+      "loss": 1.8415,
+      "step": 310
+    },
+    {
+      "epoch": 1.82,
+      "learning_rate": 0.0001266141586291718,
+      "loss": 1.8489,
+      "step": 315
+    },
+    {
+      "epoch": 1.85,
+      "learning_rate": 0.00012969474463050626,
+      "loss": 1.7954,
+      "step": 320
+    },
+    {
+      "epoch": 1.88,
+      "learning_rate": 0.00013227200040257384,
+      "loss": 1.8456,
+      "step": 325
+    },
+    {
+      "epoch": 1.91,
+      "learning_rate": 0.000134324693174103,
+      "loss": 1.8414,
+      "step": 330
+    },
+    {
+      "epoch": 1.94,
+      "learning_rate": 0.00013583591179619617,
+      "loss": 1.8602,
+      "step": 335
+    },
+    {
+      "epoch": 1.97,
+      "learning_rate": 0.00013679320606515522,
+      "loss": 1.8914,
+      "step": 340
+    },
+    {
+      "epoch": 1.99,
+      "learning_rate": 0.00013718868929372445,
+      "loss": 1.7701,
+      "step": 345
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 1.7017050981521606,
+      "eval_runtime": 10.2794,
+      "eval_samples_per_second": 22.18,
+      "eval_steps_per_second": 2.821,
+      "step": 346
     }
   ],
+  "max_steps": 346,
+  "num_train_epochs": 2,
+  "total_flos": 359537836032000.0,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dcecf49ff1760e04ac3d0e673c15ec03052622419263379f9a66e2ed277e9809
-size 2671

 version https://git-lfs.github.com/spec/v1
+oid sha256:a577c92f63c7982cffa4f5373df3f9b2218b8fba8ae03f5f9b92e96c6b2421a2
+size 2735