AlekseyKorshuk commited on
Commit
e03102c
·
1 Parent(s): fef87f9

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/boris-grebenshikov")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/2r073xb9/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Борис Гребенщиков (Boris Grebenshikov)'s lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1crr7kfj) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1crr7kfj/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
 
45
  dataset = load_dataset("huggingartists/boris-grebenshikov")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/3usdyy9d/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Борис Гребенщиков (Boris Grebenshikov)'s lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1hazzf7o) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1hazzf7o/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -35,7 +35,7 @@
35
  }
36
  },
37
  "torch_dtype": "float32",
38
- "transformers_version": "4.10.0",
39
  "use_cache": true,
40
  "vocab_size": 50257
41
  }
 
35
  }
36
  },
37
  "torch_dtype": "float32",
38
+ "transformers_version": "4.10.2",
39
  "use_cache": true,
40
  "vocab_size": 50257
41
  }
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 1.7017050981521606, "eval_runtime": 10.4031, "eval_samples_per_second": 21.916, "eval_steps_per_second": 2.788, "epoch": 2.0}
 
1
+ {"eval_loss": 1.6859837770462036, "eval_runtime": 11.1348, "eval_samples_per_second": 22.452, "eval_steps_per_second": 2.874, "epoch": 3.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4caf1777217e0c052ac94bcbeca458605d7be5f6d3bc748e776565bc142dc42e
3
  size 497764120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d13a88d644111679b0e771a0604a0cc7571d6c6d0aa4f447d2b3e3cd1d73673
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96d9aef799951a3d3c5d3a2ed572f49f197e45f0dcd10dae5417b75b54419805
3
  size 995604017
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6ad4d0daa593acffd671b9972c54cffa4ab3c9e35e5e977831f28162a0e3296
3
  size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b6c2602caf4839e464970f374187b18adcfa27e0d6a45c9600bd1c38b30141b9
3
  size 510403817
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edcb7068016669cd7d3686ab784b0f9a1369a4fdc7e206c64f18a1eeb8f4b6d1
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa2e4cc4de77dfbc81bb3c499c062d1ba2bdbc2ed6f970ac39c58f271b8d2a5f
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21247813c84eaba6dd1c8fa5e87e39109d5255edf8b9566147088529c2be57c7
3
+ size 14567
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7613c90c6af68cbcc9693f8cef8467997ccd45f7eec064cfce1c9281a578f4fe
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87ba49cc9b309f097ae74c170e865f98bf5e5b8b130e72a509a3e15fdec2e6c1
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 1.7017050981521606,
3
- "best_model_checkpoint": "output/boris-grebenshikov/checkpoint-346",
4
- "epoch": 2.0,
5
- "global_step": 346,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -444,11 +444,217 @@
444
  "eval_samples_per_second": 22.18,
445
  "eval_steps_per_second": 2.821,
446
  "step": 346
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
447
  }
448
  ],
449
- "max_steps": 346,
450
- "num_train_epochs": 2,
451
- "total_flos": 359537836032000.0,
452
  "trial_name": null,
453
  "trial_params": null
454
  }
 
1
  {
2
+ "best_metric": 1.6859837770462036,
3
+ "best_model_checkpoint": "output/boris-grebenshikov/checkpoint-510",
4
+ "epoch": 3.0,
5
+ "global_step": 510,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
444
  "eval_samples_per_second": 22.18,
445
  "eval_steps_per_second": 2.821,
446
  "step": 346
447
+ },
448
+ {
449
+ "epoch": 2.06,
450
+ "learning_rate": 0.00013603195463831566,
451
+ "loss": 1.8257,
452
+ "step": 350
453
+ },
454
+ {
455
+ "epoch": 2.09,
456
+ "learning_rate": 0.00013458123912165538,
457
+ "loss": 1.7795,
458
+ "step": 355
459
+ },
460
+ {
461
+ "epoch": 2.12,
462
+ "learning_rate": 0.00013256759493713883,
463
+ "loss": 1.6858,
464
+ "step": 360
465
+ },
466
+ {
467
+ "epoch": 2.15,
468
+ "learning_rate": 0.0001300082017869573,
469
+ "loss": 1.8715,
470
+ "step": 365
471
+ },
472
+ {
473
+ "epoch": 2.18,
474
+ "learning_rate": 0.00012692489551105156,
475
+ "loss": 1.7653,
476
+ "step": 370
477
+ },
478
+ {
479
+ "epoch": 2.21,
480
+ "learning_rate": 0.0001233439817914244,
481
+ "loss": 1.7267,
482
+ "step": 375
483
+ },
484
+ {
485
+ "epoch": 2.24,
486
+ "learning_rate": 0.0001192960117213372,
487
+ "loss": 1.7223,
488
+ "step": 380
489
+ },
490
+ {
491
+ "epoch": 2.26,
492
+ "learning_rate": 0.00011481552115415387,
493
+ "loss": 1.7739,
494
+ "step": 385
495
+ },
496
+ {
497
+ "epoch": 2.29,
498
+ "learning_rate": 0.00010994073605561706,
499
+ "loss": 1.7485,
500
+ "step": 390
501
+ },
502
+ {
503
+ "epoch": 2.32,
504
+ "learning_rate": 0.00010471324637338657,
505
+ "loss": 1.7833,
506
+ "step": 395
507
+ },
508
+ {
509
+ "epoch": 2.35,
510
+ "learning_rate": 9.917765120627052e-05,
511
+ "loss": 1.7504,
512
+ "step": 400
513
+ },
514
+ {
515
+ "epoch": 2.38,
516
+ "learning_rate": 9.338117830043871e-05,
517
+ "loss": 1.7411,
518
+ "step": 405
519
+ },
520
+ {
521
+ "epoch": 2.41,
522
+ "learning_rate": 8.737328111894491e-05,
523
+ "loss": 1.7067,
524
+ "step": 410
525
+ },
526
+ {
527
+ "epoch": 2.44,
528
+ "learning_rate": 8.120521692221671e-05,
529
+ "loss": 1.7285,
530
+ "step": 415
531
+ },
532
+ {
533
+ "epoch": 2.47,
534
+ "learning_rate": 7.492960945918252e-05,
535
+ "loss": 1.7692,
536
+ "step": 420
537
+ },
538
+ {
539
+ "epoch": 2.5,
540
+ "learning_rate": 6.860000000000001e-05,
541
+ "loss": 1.7354,
542
+ "step": 425
543
+ },
544
+ {
545
+ "epoch": 2.53,
546
+ "learning_rate": 6.227039054081752e-05,
547
+ "loss": 1.7446,
548
+ "step": 430
549
+ },
550
+ {
551
+ "epoch": 2.56,
552
+ "learning_rate": 5.599478307778333e-05,
553
+ "loss": 1.8284,
554
+ "step": 435
555
+ },
556
+ {
557
+ "epoch": 2.59,
558
+ "learning_rate": 4.9826718881055135e-05,
559
+ "loss": 1.7434,
560
+ "step": 440
561
+ },
562
+ {
563
+ "epoch": 2.62,
564
+ "learning_rate": 4.381882169956128e-05,
565
+ "loss": 1.6965,
566
+ "step": 445
567
+ },
568
+ {
569
+ "epoch": 2.65,
570
+ "learning_rate": 3.8022348793729525e-05,
571
+ "loss": 1.7405,
572
+ "step": 450
573
+ },
574
+ {
575
+ "epoch": 2.68,
576
+ "learning_rate": 3.24867536266134e-05,
577
+ "loss": 1.7292,
578
+ "step": 455
579
+ },
580
+ {
581
+ "epoch": 2.71,
582
+ "learning_rate": 2.7259263944382986e-05,
583
+ "loss": 1.7385,
584
+ "step": 460
585
+ },
586
+ {
587
+ "epoch": 2.74,
588
+ "learning_rate": 2.2384478845846205e-05,
589
+ "loss": 1.6668,
590
+ "step": 465
591
+ },
592
+ {
593
+ "epoch": 2.76,
594
+ "learning_rate": 1.7903988278662788e-05,
595
+ "loss": 1.591,
596
+ "step": 470
597
+ },
598
+ {
599
+ "epoch": 2.79,
600
+ "learning_rate": 1.3856018208575617e-05,
601
+ "loss": 1.7083,
602
+ "step": 475
603
+ },
604
+ {
605
+ "epoch": 2.82,
606
+ "learning_rate": 1.0275104488948488e-05,
607
+ "loss": 1.6955,
608
+ "step": 480
609
+ },
610
+ {
611
+ "epoch": 2.85,
612
+ "learning_rate": 7.191798213042723e-06,
613
+ "loss": 1.7392,
614
+ "step": 485
615
+ },
616
+ {
617
+ "epoch": 2.88,
618
+ "learning_rate": 4.6324050628612214e-06,
619
+ "loss": 1.7917,
620
+ "step": 490
621
+ },
622
+ {
623
+ "epoch": 2.91,
624
+ "learning_rate": 2.6187608783446213e-06,
625
+ "loss": 1.7505,
626
+ "step": 495
627
+ },
628
+ {
629
+ "epoch": 2.94,
630
+ "learning_rate": 1.1680453616843376e-06,
631
+ "loss": 1.7224,
632
+ "step": 500
633
+ },
634
+ {
635
+ "epoch": 2.97,
636
+ "learning_rate": 2.926355061606279e-07,
637
+ "loss": 1.6373,
638
+ "step": 505
639
+ },
640
+ {
641
+ "epoch": 3.0,
642
+ "learning_rate": 0.0,
643
+ "loss": 1.6567,
644
+ "step": 510
645
+ },
646
+ {
647
+ "epoch": 3.0,
648
+ "eval_loss": 1.6859837770462036,
649
+ "eval_runtime": 10.9838,
650
+ "eval_samples_per_second": 22.761,
651
+ "eval_steps_per_second": 2.913,
652
+ "step": 510
653
  }
654
  ],
655
+ "max_steps": 510,
656
+ "num_train_epochs": 3,
657
+ "total_flos": 530422824960000.0,
658
  "trial_name": null,
659
  "trial_params": null
660
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a577c92f63c7982cffa4f5373df3f9b2218b8fba8ae03f5f9b92e96c6b2421a2
3
  size 2735
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf593eb68797dde3a11c261e20993644e6e8d1db2915dab364fbd4a5c0431556
3
  size 2735