RoyJoy commited on
Commit
e4aa62f
·
verified ·
1 Parent(s): abea9c3

Training in progress, step 81, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:523d69da257f0c85429a4ac40227e30f692220895a6861332b60df44372fd2ba
3
  size 335604696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37c912bb5d83edee2139227ce07a747d94f46f4244f92e2b75a2cf32ad0aa135
3
  size 335604696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f2fb1bb1538078e71fda652609cd9e6c24d406e49aae111fc74c94390e545cf
3
  size 671466706
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:297634343be7a9bc6c9d93adea82a9c4bc9fd9453bb20ce9aff0eb62512979e2
3
  size 671466706
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a1c1ca2eb2d63320002b57411b027dbb181001114302d8b335c26f8674b2007
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6afe91f92ca570ec00fc0046b49d53bdab62ad236d2d8202ed8b87546871392c
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d222eced2b5b34720eced06dd5e0608c4941e9c1353fd156035abd0e5cf7b7ac
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66970dc56e78d2d5e63f5fbb36b724c778ac0af6cc13990a5b7eb3eebb254a9d
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:365729cdf68ba37e104e3c2d3021719ddcc569e3182cc6bc5243308e3d833e68
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c1b0f5d80f65e193095ce4aa52e70a0ead54bfd122929297ea8fdb2494e13cdb
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f44433a75e833bf28f18de1fb528eecd83ec14d73c346ae4ca18620fb6ee9df
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34c3c5787d72bf3b7ae0cfd936603b63e32f8cf1738300895a5f8328e31811bf
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:766d4898d9bcfdf7ea7b5b0fed47e9adbde3f4862283013c7956a2674af670bf
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0afdbd8ef2f5fe06163b573a80024a95f2ec653cc655cd639ecb8cceae52c1bb
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.4470236301422119,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
- "epoch": 1.2402169701666022,
5
  "eval_steps": 25,
6
- "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -381,6 +381,231 @@
381
  "eval_samples_per_second": 31.875,
382
  "eval_steps_per_second": 8.287,
383
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384
  }
385
  ],
386
  "logging_steps": 1,
@@ -404,12 +629,12 @@
404
  "should_evaluate": false,
405
  "should_log": false,
406
  "should_save": true,
407
- "should_training_stop": false
408
  },
409
  "attributes": {}
410
  }
411
  },
412
- "total_flos": 1.1938032281232343e+18,
413
  "train_batch_size": 1,
414
  "trial_name": null,
415
  "trial_params": null
 
1
  {
2
  "best_metric": 0.4470236301422119,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-50",
4
+ "epoch": 2.009298721425804,
5
  "eval_steps": 25,
6
+ "global_step": 81,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
381
  "eval_samples_per_second": 31.875,
382
  "eval_steps_per_second": 8.287,
383
  "step": 50
384
+ },
385
+ {
386
+ "epoch": 1.2650135606354127,
387
+ "grad_norm": 1.0695531368255615,
388
+ "learning_rate": 0.00011520337018925895,
389
+ "loss": 0.4534,
390
+ "step": 51
391
+ },
392
+ {
393
+ "epoch": 1.289810151104223,
394
+ "grad_norm": 0.326072633266449,
395
+ "learning_rate": 0.00011025408169024583,
396
+ "loss": 0.4319,
397
+ "step": 52
398
+ },
399
+ {
400
+ "epoch": 1.3146067415730336,
401
+ "grad_norm": 0.3365119993686676,
402
+ "learning_rate": 0.00010539135774085195,
403
+ "loss": 0.4362,
404
+ "step": 53
405
+ },
406
+ {
407
+ "epoch": 1.3394033320418441,
408
+ "grad_norm": 0.4222407042980194,
409
+ "learning_rate": 0.0001006228873071517,
410
+ "loss": 0.43,
411
+ "step": 54
412
+ },
413
+ {
414
+ "epoch": 1.3641999225106547,
415
+ "grad_norm": 0.35905617475509644,
416
+ "learning_rate": 9.595621032103801e-05,
417
+ "loss": 0.427,
418
+ "step": 55
419
+ },
420
+ {
421
+ "epoch": 1.3889965129794652,
422
+ "grad_norm": 0.26657891273498535,
423
+ "learning_rate": 9.139870575803991e-05,
424
+ "loss": 0.4208,
425
+ "step": 56
426
+ },
427
+ {
428
+ "epoch": 1.4137931034482758,
429
+ "grad_norm": 0.2459261119365692,
430
+ "learning_rate": 8.695757996964544e-05,
431
+ "loss": 0.4123,
432
+ "step": 57
433
+ },
434
+ {
435
+ "epoch": 1.4385896939170864,
436
+ "grad_norm": 0.3061677813529968,
437
+ "learning_rate": 8.263985528857921e-05,
438
+ "loss": 0.429,
439
+ "step": 58
440
+ },
441
+ {
442
+ "epoch": 1.463386284385897,
443
+ "grad_norm": 0.4884737432003021,
444
+ "learning_rate": 7.845235892505033e-05,
445
+ "loss": 0.4428,
446
+ "step": 59
447
+ },
448
+ {
449
+ "epoch": 1.4881828748547075,
450
+ "grad_norm": 0.3974491059780121,
451
+ "learning_rate": 7.440171217153001e-05,
452
+ "loss": 0.4357,
453
+ "step": 60
454
+ },
455
+ {
456
+ "epoch": 1.512979465323518,
457
+ "grad_norm": 0.31677502393722534,
458
+ "learning_rate": 7.049431993312759e-05,
459
+ "loss": 0.4063,
460
+ "step": 61
461
+ },
462
+ {
463
+ "epoch": 1.5377760557923286,
464
+ "grad_norm": 0.5034978985786438,
465
+ "learning_rate": 6.67363606001183e-05,
466
+ "loss": 0.416,
467
+ "step": 62
468
+ },
469
+ {
470
+ "epoch": 1.5625726462611391,
471
+ "grad_norm": 0.3409871757030487,
472
+ "learning_rate": 6.313377627863891e-05,
473
+ "loss": 0.406,
474
+ "step": 63
475
+ },
476
+ {
477
+ "epoch": 1.5873692367299497,
478
+ "grad_norm": 0.2753334939479828,
479
+ "learning_rate": 5.9692263394996376e-05,
480
+ "loss": 0.3993,
481
+ "step": 64
482
+ },
483
+ {
484
+ "epoch": 1.6121658271987602,
485
+ "grad_norm": 0.4815467596054077,
486
+ "learning_rate": 5.641726368844675e-05,
487
+ "loss": 0.4139,
488
+ "step": 65
489
+ },
490
+ {
491
+ "epoch": 1.6369624176675708,
492
+ "grad_norm": 0.42127665877342224,
493
+ "learning_rate": 5.3313955606687296e-05,
494
+ "loss": 0.4155,
495
+ "step": 66
496
+ },
497
+ {
498
+ "epoch": 1.6617590081363813,
499
+ "grad_norm": 0.29522252082824707,
500
+ "learning_rate": 5.038724611766585e-05,
501
+ "loss": 0.4038,
502
+ "step": 67
503
+ },
504
+ {
505
+ "epoch": 1.6865555986051919,
506
+ "grad_norm": 0.3209092319011688,
507
+ "learning_rate": 4.764176295065636e-05,
508
+ "loss": 0.4012,
509
+ "step": 68
510
+ },
511
+ {
512
+ "epoch": 1.7113521890740024,
513
+ "grad_norm": 0.4263613820075989,
514
+ "learning_rate": 4.508184727886723e-05,
515
+ "loss": 0.4195,
516
+ "step": 69
517
+ },
518
+ {
519
+ "epoch": 1.736148779542813,
520
+ "grad_norm": 0.42666929960250854,
521
+ "learning_rate": 4.271154685515435e-05,
522
+ "loss": 0.4198,
523
+ "step": 70
524
+ },
525
+ {
526
+ "epoch": 1.7609453700116235,
527
+ "grad_norm": 0.29947665333747864,
528
+ "learning_rate": 4.0534609611692115e-05,
529
+ "loss": 0.3954,
530
+ "step": 71
531
+ },
532
+ {
533
+ "epoch": 1.785741960480434,
534
+ "grad_norm": 0.3265860676765442,
535
+ "learning_rate": 3.855447773372175e-05,
536
+ "loss": 0.402,
537
+ "step": 72
538
+ },
539
+ {
540
+ "epoch": 1.8105385509492444,
541
+ "grad_norm": 0.4390391707420349,
542
+ "learning_rate": 3.6774282216749336e-05,
543
+ "loss": 0.3979,
544
+ "step": 73
545
+ },
546
+ {
547
+ "epoch": 1.835335141418055,
548
+ "grad_norm": 0.28207874298095703,
549
+ "learning_rate": 3.519683791579824e-05,
550
+ "loss": 0.3837,
551
+ "step": 74
552
+ },
553
+ {
554
+ "epoch": 1.8601317318868655,
555
+ "grad_norm": 0.3026869297027588,
556
+ "learning_rate": 3.382463909454464e-05,
557
+ "loss": 0.3973,
558
+ "step": 75
559
+ },
560
+ {
561
+ "epoch": 1.8601317318868655,
562
+ "eval_loss": 0.39713945984840393,
563
+ "eval_runtime": 1.5675,
564
+ "eval_samples_per_second": 31.898,
565
+ "eval_steps_per_second": 8.294,
566
+ "step": 75
567
+ },
568
+ {
569
+ "epoch": 1.884928322355676,
570
+ "grad_norm": 0.25843551754951477,
571
+ "learning_rate": 3.265985548137413e-05,
572
+ "loss": 0.3886,
573
+ "step": 76
574
+ },
575
+ {
576
+ "epoch": 1.9097249128244866,
577
+ "grad_norm": 0.23140764236450195,
578
+ "learning_rate": 3.1704328838595e-05,
579
+ "loss": 0.3856,
580
+ "step": 77
581
+ },
582
+ {
583
+ "epoch": 1.9345215032932972,
584
+ "grad_norm": 0.34332171082496643,
585
+ "learning_rate": 3.0959570050233586e-05,
586
+ "loss": 0.3945,
587
+ "step": 78
588
+ },
589
+ {
590
+ "epoch": 1.9593180937621077,
591
+ "grad_norm": 0.3156374990940094,
592
+ "learning_rate": 3.0426756733016063e-05,
593
+ "loss": 0.3956,
594
+ "step": 79
595
+ },
596
+ {
597
+ "epoch": 1.9841146842309183,
598
+ "grad_norm": 0.34009358286857605,
599
+ "learning_rate": 3.010673137431425e-05,
600
+ "loss": 0.4036,
601
+ "step": 80
602
+ },
603
+ {
604
+ "epoch": 2.009298721425804,
605
+ "grad_norm": 0.43867069482803345,
606
+ "learning_rate": 2.9999999999999997e-05,
607
+ "loss": 0.6494,
608
+ "step": 81
609
  }
610
  ],
611
  "logging_steps": 1,
 
629
  "should_evaluate": false,
630
  "should_log": false,
631
  "should_save": true,
632
+ "should_training_stop": true
633
  },
634
  "attributes": {}
635
  }
636
  },
637
+ "total_flos": 1.9339388490004562e+18,
638
  "train_batch_size": 1,
639
  "trial_name": null,
640
  "trial_params": null