tiendung commited on
Commit
ed7d67b
·
1 Parent(s): a731ee0

Upload 2 files

Browse files
rwkv-laws_ctx512_l12_d1024_lr44e-5.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef5b748a27eec492da68884d2b080532ffe157492d555a2cd2c360e08fa63a03
3
+ size 394576931
rwkv-laws_ctx512_l12_d1024_lr44e-5.txt ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ NEW RUN 2023-02-23-20-04-17
2
+ 0 2.520293 12.4322 0.00079302 2023-02-23 20:47:51.418890 0
3
+ 1 1.819570 6.1692 0.00078610 2023-02-23 21:24:04.065679 1
4
+ 2 1.663320 5.2768 0.00077924 2023-02-23 22:00:16.611902 2
5
+ 3 1.570898 4.8110 0.00077244 2023-02-23 22:36:28.766211 3
6
+ 4 1.504258 4.5008 0.00076570 2023-02-23 23:12:41.231950 4
7
+ 5 1.456924 4.2927 0.00075902 2023-02-23 23:48:53.668959 5
8
+ 6 1.422939 4.1493 0.00075240 2023-02-24 00:25:06.628034 6
9
+ 7 1.385137 3.9954 0.00074583 2023-02-24 01:01:19.034342 7
10
+ 8 1.356396 3.8822 0.00073932 2023-02-24 01:37:31.416777 8
11
+ 9 1.330225 3.7819 0.00073287 2023-02-24 02:13:43.909189 9
12
+ 10 1.311406 3.7114 0.00072648 2023-02-24 02:49:56.072533 10
13
+ 11 1.290562 3.6348 0.00072014 2023-02-24 03:26:08.656643 11
14
+ 12 1.275762 3.5814 0.00071385 2023-02-24 04:02:20.421938 12
15
+ 13 1.250186 3.4910 0.00070763 2023-02-24 04:38:34.029029 13
16
+ 14 1.240928 3.4588 0.00070145 2023-02-24 05:14:47.098515 14
17
+ 15 1.228320 3.4155 0.00069533 2023-02-24 05:51:01.040499 15
18
+ 16 1.207051 3.3436 0.00068926 2023-02-24 06:27:14.179952 16
19
+ 17 1.202168 3.3273 0.00068325 2023-02-24 07:03:32.193153 17
20
+
21
+ NEW RUN 2023-02-24-07-15-23
22
+ {'load_model': '../models/rwkv-17.pth', 'proj_dir': '../models', 'random_seed': -1, 'data_file': '../laws/laws_text_document', 'tokenizer': 'sentencepiece', 'vocab_file': '../laws/sp_txt_16384.model', 'vocab_size': 16384, 'ctx_len': 512, 'epoch_steps': 1200, 'epoch_count': 500, 'epoch_begin': 0, 'epoch_save': 1, 'micro_bsz': 80, 'n_layer': 12, 'n_embd': 1024, 'lr_init': 0.00068, 'lr_final': 1e-05, 'warmup_steps': 0, 'beta1': 0.95, 'beta2': 0.98, 'adam_eps': 1e-08, 'grad_cp': 1, 'my_pile_edecay': 0, 'layerwise_lr': 1, 'ds_bucket_mb': 200, 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '1', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': True, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': None, 'max_epochs': -1, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'deepspeed', 'sync_batchnorm': False, 'precision': 'bf16', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2023-02-24-07-15-23', 'betas': (0.95, 0.98), 'real_bsz': 80, 'run_name': '0 ctx512 L12 D1024'}
23
+
24
+ {'zero_allow_untested_optimizer': True, 'zero_optimization': {'stage': 2, 'contiguous_gradients': True, 'overlap_comm': True, 'allgather_partitions': True, 'reduce_scatter': True, 'allgather_bucket_size': 200000000, 'reduce_bucket_size': 200000000, 'sub_group_size': 1000000000000}, 'activation_checkpointing': {'partition_activations': False, 'cpu_checkpointing': False, 'contiguous_memory_optimization': False, 'synchronize_checkpoint_boundary': False}, 'aio': {'block_size': 1048576, 'queue_depth': 8, 'single_submit': False, 'overlap_events': True, 'thread_count': 1}, 'gradient_accumulation_steps': 1, 'train_micro_batch_size_per_gpu': 72, 'gradient_clipping': 1.0, 'bf16': {'enabled': True}, 'compression_training': {'weight_quantization': {'shared_parameters': {}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {}, 'different_groups': {}}, 'channel_pruning': {'shared_parameters': {}, 'different_groups': {}}}}
25
+
26
+ 0 1.181022 3.2577 0.00067429 2023-02-24 08:21:11.122989 0
27
+ 1 1.153398 3.1689 0.00066862 2023-02-24 09:21:17.552559 1
28
+ 2 1.139333 3.1247 0.00066300 2023-02-24 10:23:47.337609 2
29
+ 3 1.126719 3.0855 0.00065743 2023-02-24 11:31:01.692680 3
30
+ 4 1.112210 3.0411 0.00065190 2023-02-24 12:38:29.066986 4
31
+ 5 1.097311 2.9961 0.00064643 2023-02-24 13:46:14.718587 5
32
+ 6 1.088249 2.9691 0.00064099 2023-02-24 15:00:07.427120 6
33
+ 7 1.075518 2.9315 0.00063561 2023-02-24 16:15:02.688796 7
34
+ 8 1.065625 2.9027 0.00063027 2023-02-24 17:29:37.521291 8
35
+ 9 1.052129 2.8637 0.00062497 2023-02-24 18:44:15.401113 9
36
+ 10 1.043646 2.8396 0.00061972 2023-02-24 19:59:00.800355 10
37
+ 11 1.036712 2.8199 0.00061451 2023-02-24 21:13:33.167569 11
38
+ 12 1.031719 2.8059 0.00060935 2023-02-24 22:28:05.710493 12
39
+ 13 1.015199 2.7599 0.00060422 2023-02-24 23:42:38.805873 13
40
+ 14 1.015088 2.7596 0.00059915 2023-02-25 00:57:11.797412 14
41
+ 15 1.001426 2.7222 0.00059411 2023-02-25 02:11:45.660196 15
42
+ 16 0.994893 2.7044 0.00058912 2023-02-25 03:26:18.603743 16
43
+ 17 0.988011 2.6859 0.00058417 2023-02-25 04:40:52.415416 17
44
+ 18 0.980583 2.6660 0.00057926 2023-02-25 05:55:25.097743 18
45
+ 19 0.976156 2.6542 0.00057439 2023-02-25 07:10:08.750546 19
46
+ 20 0.967044 2.6302 0.00056957 2023-02-25 08:24:09.951182 20
47
+ 21 0.962083 2.6171 0.00056478 2023-02-25 09:31:38.754236 21
48
+ 22 0.954681 2.5978 0.00056003 2023-02-25 10:39:00.171111 22
49
+ 23 0.952402 2.5919 0.00055533 2023-02-25 11:46:20.039013 23
50
+ 24 0.946331 2.5762 0.00055066 2023-02-25 12:53:38.897788 24
51
+ 25 0.938929 2.5572 0.00054603 2023-02-25 14:00:59.019809 25
52
+ 26 0.933932 2.5445 0.00054144 2023-02-25 15:08:24.585025 26
53
+ 27 0.928662 2.5311 0.00053689 2023-02-25 16:15:49.109635 27
54
+ 28 0.920527 2.5106 0.00053238 2023-02-25 17:23:08.213674 28
55
+ 29 0.921217 2.5123 0.00052791 2023-02-25 18:30:28.765746 29
56
+ 30 0.912321 2.4901 0.00052347 2023-02-25 19:40:13.607648 30
57
+ 31 0.908691 2.4811 0.00051907 2023-02-25 20:50:18.471005 31
58
+ 32 0.904814 2.4715 0.00051471 2023-02-25 22:00:29.135635 32
59
+ 33 0.899443 2.4582 0.00051039 2023-02-25 23:10:40.333796 33
60
+ 34 0.892103 2.4403 0.00050610 2023-02-26 00:20:44.211534 34
61
+ 35 0.890879 2.4373 0.00050184 2023-02-26 01:30:47.505920 35
62
+ 36 0.885182 2.4234 0.00049763 2023-02-26 02:40:50.286844 36
63
+ 37 0.879235 2.4091 0.00049344 2023-02-26 03:50:53.387048 37
64
+ 38 0.880928 2.4131 0.00048930 2023-02-26 05:00:56.710483 38
65
+ 39 0.876188 2.4017 0.00048519 2023-02-26 06:10:59.181386 39
66
+ 40 0.872161 2.3921 0.00048111 2023-02-26 07:21:22.994460 40
67
+ 41 0.870189 2.3874 0.00047707 2023-02-26 08:31:40.223079 41
68
+ 42 0.862402 2.3688 0.00047306 2023-02-26 09:41:54.254247 42
69
+ 43 0.857510 2.3573 0.00046908 2023-02-26 10:52:08.245272 43
70
+ 44 0.856999 2.3561 0.00046514 2023-02-26 12:02:22.399308 44
71
+ 45 0.853079 2.3469 0.00046123 2023-02-26 13:12:37.250985 45
72
+ 46 0.851484 2.3431 0.00045735 2023-02-26 14:22:50.660896 46
73
+ 47 0.843223 2.3238 0.00045351 2023-02-26 15:33:05.060451 47
74
+ 48 0.840954 2.3186 0.00044970 2023-02-26 16:45:37.159289 48
75
+ 49 0.838571 2.3131 0.00044592 2023-02-26 17:59:17.895446 49