AlekseyKorshuk
commited on
Commit
·
e03102c
1
Parent(s):
fef87f9
huggingartists
Browse files- README.md +3 -3
- config.json +1 -1
- evaluation.txt +1 -1
- flax_model.msgpack +1 -1
- optimizer.pt +1 -1
- pytorch_model.bin +1 -1
- rng_state.pth +2 -2
- scheduler.pt +1 -1
- trainer_state.json +213 -7
- training_args.bin +1 -1
README.md
CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
|
|
45 |
dataset = load_dataset("huggingartists/boris-grebenshikov")
|
46 |
```
|
47 |
|
48 |
-
[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/
|
49 |
|
50 |
## Training procedure
|
51 |
|
52 |
The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Борис Гребенщиков (Boris Grebenshikov)'s lyrics.
|
53 |
|
54 |
-
Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/
|
55 |
|
56 |
-
At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/
|
57 |
|
58 |
## How to use
|
59 |
|
|
|
45 |
dataset = load_dataset("huggingartists/boris-grebenshikov")
|
46 |
```
|
47 |
|
48 |
+
[Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/3usdyy9d/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
|
49 |
|
50 |
## Training procedure
|
51 |
|
52 |
The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Борис Гребенщиков (Boris Grebenshikov)'s lyrics.
|
53 |
|
54 |
+
Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1hazzf7o) for full transparency and reproducibility.
|
55 |
|
56 |
+
At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1hazzf7o/artifacts) is logged and versioned.
|
57 |
|
58 |
## How to use
|
59 |
|
config.json
CHANGED
@@ -35,7 +35,7 @@
|
|
35 |
}
|
36 |
},
|
37 |
"torch_dtype": "float32",
|
38 |
-
"transformers_version": "4.10.
|
39 |
"use_cache": true,
|
40 |
"vocab_size": 50257
|
41 |
}
|
|
|
35 |
}
|
36 |
},
|
37 |
"torch_dtype": "float32",
|
38 |
+
"transformers_version": "4.10.2",
|
39 |
"use_cache": true,
|
40 |
"vocab_size": 50257
|
41 |
}
|
evaluation.txt
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"eval_loss": 1.
|
|
|
1 |
+
{"eval_loss": 1.6859837770462036, "eval_runtime": 11.1348, "eval_samples_per_second": 22.452, "eval_steps_per_second": 2.874, "epoch": 3.0}
|
flax_model.msgpack
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 497764120
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2d13a88d644111679b0e771a0604a0cc7571d6c6d0aa4f447d2b3e3cd1d73673
|
3 |
size 497764120
|
optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 995604017
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e6ad4d0daa593acffd671b9972c54cffa4ab3c9e35e5e977831f28162a0e3296
|
3 |
size 995604017
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 510403817
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:edcb7068016669cd7d3686ab784b0f9a1369a4fdc7e206c64f18a1eeb8f4b6d1
|
3 |
size 510403817
|
rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21247813c84eaba6dd1c8fa5e87e39109d5255edf8b9566147088529c2be57c7
|
3 |
+
size 14567
|
scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:87ba49cc9b309f097ae74c170e865f98bf5e5b8b130e72a509a3e15fdec2e6c1
|
3 |
size 623
|
trainer_state.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
-
"best_metric": 1.
|
3 |
-
"best_model_checkpoint": "output/boris-grebenshikov/checkpoint-
|
4 |
-
"epoch":
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -444,11 +444,217 @@
|
|
444 |
"eval_samples_per_second": 22.18,
|
445 |
"eval_steps_per_second": 2.821,
|
446 |
"step": 346
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
447 |
}
|
448 |
],
|
449 |
-
"max_steps":
|
450 |
-
"num_train_epochs":
|
451 |
-
"total_flos":
|
452 |
"trial_name": null,
|
453 |
"trial_params": null
|
454 |
}
|
|
|
1 |
{
|
2 |
+
"best_metric": 1.6859837770462036,
|
3 |
+
"best_model_checkpoint": "output/boris-grebenshikov/checkpoint-510",
|
4 |
+
"epoch": 3.0,
|
5 |
+
"global_step": 510,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
444 |
"eval_samples_per_second": 22.18,
|
445 |
"eval_steps_per_second": 2.821,
|
446 |
"step": 346
|
447 |
+
},
|
448 |
+
{
|
449 |
+
"epoch": 2.06,
|
450 |
+
"learning_rate": 0.00013603195463831566,
|
451 |
+
"loss": 1.8257,
|
452 |
+
"step": 350
|
453 |
+
},
|
454 |
+
{
|
455 |
+
"epoch": 2.09,
|
456 |
+
"learning_rate": 0.00013458123912165538,
|
457 |
+
"loss": 1.7795,
|
458 |
+
"step": 355
|
459 |
+
},
|
460 |
+
{
|
461 |
+
"epoch": 2.12,
|
462 |
+
"learning_rate": 0.00013256759493713883,
|
463 |
+
"loss": 1.6858,
|
464 |
+
"step": 360
|
465 |
+
},
|
466 |
+
{
|
467 |
+
"epoch": 2.15,
|
468 |
+
"learning_rate": 0.0001300082017869573,
|
469 |
+
"loss": 1.8715,
|
470 |
+
"step": 365
|
471 |
+
},
|
472 |
+
{
|
473 |
+
"epoch": 2.18,
|
474 |
+
"learning_rate": 0.00012692489551105156,
|
475 |
+
"loss": 1.7653,
|
476 |
+
"step": 370
|
477 |
+
},
|
478 |
+
{
|
479 |
+
"epoch": 2.21,
|
480 |
+
"learning_rate": 0.0001233439817914244,
|
481 |
+
"loss": 1.7267,
|
482 |
+
"step": 375
|
483 |
+
},
|
484 |
+
{
|
485 |
+
"epoch": 2.24,
|
486 |
+
"learning_rate": 0.0001192960117213372,
|
487 |
+
"loss": 1.7223,
|
488 |
+
"step": 380
|
489 |
+
},
|
490 |
+
{
|
491 |
+
"epoch": 2.26,
|
492 |
+
"learning_rate": 0.00011481552115415387,
|
493 |
+
"loss": 1.7739,
|
494 |
+
"step": 385
|
495 |
+
},
|
496 |
+
{
|
497 |
+
"epoch": 2.29,
|
498 |
+
"learning_rate": 0.00010994073605561706,
|
499 |
+
"loss": 1.7485,
|
500 |
+
"step": 390
|
501 |
+
},
|
502 |
+
{
|
503 |
+
"epoch": 2.32,
|
504 |
+
"learning_rate": 0.00010471324637338657,
|
505 |
+
"loss": 1.7833,
|
506 |
+
"step": 395
|
507 |
+
},
|
508 |
+
{
|
509 |
+
"epoch": 2.35,
|
510 |
+
"learning_rate": 9.917765120627052e-05,
|
511 |
+
"loss": 1.7504,
|
512 |
+
"step": 400
|
513 |
+
},
|
514 |
+
{
|
515 |
+
"epoch": 2.38,
|
516 |
+
"learning_rate": 9.338117830043871e-05,
|
517 |
+
"loss": 1.7411,
|
518 |
+
"step": 405
|
519 |
+
},
|
520 |
+
{
|
521 |
+
"epoch": 2.41,
|
522 |
+
"learning_rate": 8.737328111894491e-05,
|
523 |
+
"loss": 1.7067,
|
524 |
+
"step": 410
|
525 |
+
},
|
526 |
+
{
|
527 |
+
"epoch": 2.44,
|
528 |
+
"learning_rate": 8.120521692221671e-05,
|
529 |
+
"loss": 1.7285,
|
530 |
+
"step": 415
|
531 |
+
},
|
532 |
+
{
|
533 |
+
"epoch": 2.47,
|
534 |
+
"learning_rate": 7.492960945918252e-05,
|
535 |
+
"loss": 1.7692,
|
536 |
+
"step": 420
|
537 |
+
},
|
538 |
+
{
|
539 |
+
"epoch": 2.5,
|
540 |
+
"learning_rate": 6.860000000000001e-05,
|
541 |
+
"loss": 1.7354,
|
542 |
+
"step": 425
|
543 |
+
},
|
544 |
+
{
|
545 |
+
"epoch": 2.53,
|
546 |
+
"learning_rate": 6.227039054081752e-05,
|
547 |
+
"loss": 1.7446,
|
548 |
+
"step": 430
|
549 |
+
},
|
550 |
+
{
|
551 |
+
"epoch": 2.56,
|
552 |
+
"learning_rate": 5.599478307778333e-05,
|
553 |
+
"loss": 1.8284,
|
554 |
+
"step": 435
|
555 |
+
},
|
556 |
+
{
|
557 |
+
"epoch": 2.59,
|
558 |
+
"learning_rate": 4.9826718881055135e-05,
|
559 |
+
"loss": 1.7434,
|
560 |
+
"step": 440
|
561 |
+
},
|
562 |
+
{
|
563 |
+
"epoch": 2.62,
|
564 |
+
"learning_rate": 4.381882169956128e-05,
|
565 |
+
"loss": 1.6965,
|
566 |
+
"step": 445
|
567 |
+
},
|
568 |
+
{
|
569 |
+
"epoch": 2.65,
|
570 |
+
"learning_rate": 3.8022348793729525e-05,
|
571 |
+
"loss": 1.7405,
|
572 |
+
"step": 450
|
573 |
+
},
|
574 |
+
{
|
575 |
+
"epoch": 2.68,
|
576 |
+
"learning_rate": 3.24867536266134e-05,
|
577 |
+
"loss": 1.7292,
|
578 |
+
"step": 455
|
579 |
+
},
|
580 |
+
{
|
581 |
+
"epoch": 2.71,
|
582 |
+
"learning_rate": 2.7259263944382986e-05,
|
583 |
+
"loss": 1.7385,
|
584 |
+
"step": 460
|
585 |
+
},
|
586 |
+
{
|
587 |
+
"epoch": 2.74,
|
588 |
+
"learning_rate": 2.2384478845846205e-05,
|
589 |
+
"loss": 1.6668,
|
590 |
+
"step": 465
|
591 |
+
},
|
592 |
+
{
|
593 |
+
"epoch": 2.76,
|
594 |
+
"learning_rate": 1.7903988278662788e-05,
|
595 |
+
"loss": 1.591,
|
596 |
+
"step": 470
|
597 |
+
},
|
598 |
+
{
|
599 |
+
"epoch": 2.79,
|
600 |
+
"learning_rate": 1.3856018208575617e-05,
|
601 |
+
"loss": 1.7083,
|
602 |
+
"step": 475
|
603 |
+
},
|
604 |
+
{
|
605 |
+
"epoch": 2.82,
|
606 |
+
"learning_rate": 1.0275104488948488e-05,
|
607 |
+
"loss": 1.6955,
|
608 |
+
"step": 480
|
609 |
+
},
|
610 |
+
{
|
611 |
+
"epoch": 2.85,
|
612 |
+
"learning_rate": 7.191798213042723e-06,
|
613 |
+
"loss": 1.7392,
|
614 |
+
"step": 485
|
615 |
+
},
|
616 |
+
{
|
617 |
+
"epoch": 2.88,
|
618 |
+
"learning_rate": 4.6324050628612214e-06,
|
619 |
+
"loss": 1.7917,
|
620 |
+
"step": 490
|
621 |
+
},
|
622 |
+
{
|
623 |
+
"epoch": 2.91,
|
624 |
+
"learning_rate": 2.6187608783446213e-06,
|
625 |
+
"loss": 1.7505,
|
626 |
+
"step": 495
|
627 |
+
},
|
628 |
+
{
|
629 |
+
"epoch": 2.94,
|
630 |
+
"learning_rate": 1.1680453616843376e-06,
|
631 |
+
"loss": 1.7224,
|
632 |
+
"step": 500
|
633 |
+
},
|
634 |
+
{
|
635 |
+
"epoch": 2.97,
|
636 |
+
"learning_rate": 2.926355061606279e-07,
|
637 |
+
"loss": 1.6373,
|
638 |
+
"step": 505
|
639 |
+
},
|
640 |
+
{
|
641 |
+
"epoch": 3.0,
|
642 |
+
"learning_rate": 0.0,
|
643 |
+
"loss": 1.6567,
|
644 |
+
"step": 510
|
645 |
+
},
|
646 |
+
{
|
647 |
+
"epoch": 3.0,
|
648 |
+
"eval_loss": 1.6859837770462036,
|
649 |
+
"eval_runtime": 10.9838,
|
650 |
+
"eval_samples_per_second": 22.761,
|
651 |
+
"eval_steps_per_second": 2.913,
|
652 |
+
"step": 510
|
653 |
}
|
654 |
],
|
655 |
+
"max_steps": 510,
|
656 |
+
"num_train_epochs": 3,
|
657 |
+
"total_flos": 530422824960000.0,
|
658 |
"trial_name": null,
|
659 |
"trial_params": null
|
660 |
}
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 2735
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf593eb68797dde3a11c261e20993644e6e8d1db2915dab364fbd4a5c0431556
|
3 |
size 2735
|