Upload 2 files
Browse files
rwkv-laws_ctx512_l12_d1024_lr44e-5.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ef5b748a27eec492da68884d2b080532ffe157492d555a2cd2c360e08fa63a03
|
3 |
+
size 394576931
|
rwkv-laws_ctx512_l12_d1024_lr44e-5.txt
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
NEW RUN 2023-02-23-20-04-17
|
2 |
+
0 2.520293 12.4322 0.00079302 2023-02-23 20:47:51.418890 0
|
3 |
+
1 1.819570 6.1692 0.00078610 2023-02-23 21:24:04.065679 1
|
4 |
+
2 1.663320 5.2768 0.00077924 2023-02-23 22:00:16.611902 2
|
5 |
+
3 1.570898 4.8110 0.00077244 2023-02-23 22:36:28.766211 3
|
6 |
+
4 1.504258 4.5008 0.00076570 2023-02-23 23:12:41.231950 4
|
7 |
+
5 1.456924 4.2927 0.00075902 2023-02-23 23:48:53.668959 5
|
8 |
+
6 1.422939 4.1493 0.00075240 2023-02-24 00:25:06.628034 6
|
9 |
+
7 1.385137 3.9954 0.00074583 2023-02-24 01:01:19.034342 7
|
10 |
+
8 1.356396 3.8822 0.00073932 2023-02-24 01:37:31.416777 8
|
11 |
+
9 1.330225 3.7819 0.00073287 2023-02-24 02:13:43.909189 9
|
12 |
+
10 1.311406 3.7114 0.00072648 2023-02-24 02:49:56.072533 10
|
13 |
+
11 1.290562 3.6348 0.00072014 2023-02-24 03:26:08.656643 11
|
14 |
+
12 1.275762 3.5814 0.00071385 2023-02-24 04:02:20.421938 12
|
15 |
+
13 1.250186 3.4910 0.00070763 2023-02-24 04:38:34.029029 13
|
16 |
+
14 1.240928 3.4588 0.00070145 2023-02-24 05:14:47.098515 14
|
17 |
+
15 1.228320 3.4155 0.00069533 2023-02-24 05:51:01.040499 15
|
18 |
+
16 1.207051 3.3436 0.00068926 2023-02-24 06:27:14.179952 16
|
19 |
+
17 1.202168 3.3273 0.00068325 2023-02-24 07:03:32.193153 17
|
20 |
+
|
21 |
+
NEW RUN 2023-02-24-07-15-23
|
22 |
+
{'load_model': '../models/rwkv-17.pth', 'proj_dir': '../models', 'random_seed': -1, 'data_file': '../laws/laws_text_document', 'tokenizer': 'sentencepiece', 'vocab_file': '../laws/sp_txt_16384.model', 'vocab_size': 16384, 'ctx_len': 512, 'epoch_steps': 1200, 'epoch_count': 500, 'epoch_begin': 0, 'epoch_save': 1, 'micro_bsz': 80, 'n_layer': 12, 'n_embd': 1024, 'lr_init': 0.00068, 'lr_final': 1e-05, 'warmup_steps': 0, 'beta1': 0.95, 'beta2': 0.98, 'adam_eps': 1e-08, 'grad_cp': 1, 'my_pile_edecay': 0, 'layerwise_lr': 1, 'ds_bucket_mb': 200, 'logger': False, 'enable_checkpointing': False, 'default_root_dir': None, 'gradient_clip_val': 1.0, 'gradient_clip_algorithm': None, 'num_nodes': 1, 'num_processes': None, 'devices': '1', 'gpus': None, 'auto_select_gpus': None, 'tpu_cores': None, 'ipus': None, 'enable_progress_bar': True, 'overfit_batches': 0.0, 'track_grad_norm': -1, 'check_val_every_n_epoch': 100000000000000000000, 'fast_dev_run': False, 'accumulate_grad_batches': None, 'max_epochs': -1, 'min_epochs': None, 'max_steps': -1, 'min_steps': None, 'max_time': None, 'limit_train_batches': None, 'limit_val_batches': None, 'limit_test_batches': None, 'limit_predict_batches': None, 'val_check_interval': None, 'log_every_n_steps': 100000000000000000000, 'accelerator': 'gpu', 'strategy': 'deepspeed', 'sync_batchnorm': False, 'precision': 'bf16', 'enable_model_summary': True, 'num_sanity_val_steps': 0, 'resume_from_checkpoint': None, 'profiler': None, 'benchmark': None, 'reload_dataloaders_every_n_epochs': 0, 'auto_lr_find': False, 'replace_sampler_ddp': False, 'detect_anomaly': False, 'auto_scale_batch_size': False, 'plugins': None, 'amp_backend': None, 'amp_level': None, 'move_metrics_to_cpu': False, 'multiple_trainloader_mode': 'max_size_cycle', 'inference_mode': True, 'my_timestamp': '2023-02-24-07-15-23', 'betas': (0.95, 0.98), 'real_bsz': 80, 'run_name': '0 ctx512 L12 D1024'}
|
23 |
+
|
24 |
+
{'zero_allow_untested_optimizer': True, 'zero_optimization': {'stage': 2, 'contiguous_gradients': True, 'overlap_comm': True, 'allgather_partitions': True, 'reduce_scatter': True, 'allgather_bucket_size': 200000000, 'reduce_bucket_size': 200000000, 'sub_group_size': 1000000000000}, 'activation_checkpointing': {'partition_activations': False, 'cpu_checkpointing': False, 'contiguous_memory_optimization': False, 'synchronize_checkpoint_boundary': False}, 'aio': {'block_size': 1048576, 'queue_depth': 8, 'single_submit': False, 'overlap_events': True, 'thread_count': 1}, 'gradient_accumulation_steps': 1, 'train_micro_batch_size_per_gpu': 72, 'gradient_clipping': 1.0, 'bf16': {'enabled': True}, 'compression_training': {'weight_quantization': {'shared_parameters': {}, 'different_groups': {}}, 'activation_quantization': {'shared_parameters': {}, 'different_groups': {}}, 'sparse_pruning': {'shared_parameters': {}, 'different_groups': {}}, 'row_pruning': {'shared_parameters': {}, 'different_groups': {}}, 'head_pruning': {'shared_parameters': {}, 'different_groups': {}}, 'channel_pruning': {'shared_parameters': {}, 'different_groups': {}}}}
|
25 |
+
|
26 |
+
0 1.181022 3.2577 0.00067429 2023-02-24 08:21:11.122989 0
|
27 |
+
1 1.153398 3.1689 0.00066862 2023-02-24 09:21:17.552559 1
|
28 |
+
2 1.139333 3.1247 0.00066300 2023-02-24 10:23:47.337609 2
|
29 |
+
3 1.126719 3.0855 0.00065743 2023-02-24 11:31:01.692680 3
|
30 |
+
4 1.112210 3.0411 0.00065190 2023-02-24 12:38:29.066986 4
|
31 |
+
5 1.097311 2.9961 0.00064643 2023-02-24 13:46:14.718587 5
|
32 |
+
6 1.088249 2.9691 0.00064099 2023-02-24 15:00:07.427120 6
|
33 |
+
7 1.075518 2.9315 0.00063561 2023-02-24 16:15:02.688796 7
|
34 |
+
8 1.065625 2.9027 0.00063027 2023-02-24 17:29:37.521291 8
|
35 |
+
9 1.052129 2.8637 0.00062497 2023-02-24 18:44:15.401113 9
|
36 |
+
10 1.043646 2.8396 0.00061972 2023-02-24 19:59:00.800355 10
|
37 |
+
11 1.036712 2.8199 0.00061451 2023-02-24 21:13:33.167569 11
|
38 |
+
12 1.031719 2.8059 0.00060935 2023-02-24 22:28:05.710493 12
|
39 |
+
13 1.015199 2.7599 0.00060422 2023-02-24 23:42:38.805873 13
|
40 |
+
14 1.015088 2.7596 0.00059915 2023-02-25 00:57:11.797412 14
|
41 |
+
15 1.001426 2.7222 0.00059411 2023-02-25 02:11:45.660196 15
|
42 |
+
16 0.994893 2.7044 0.00058912 2023-02-25 03:26:18.603743 16
|
43 |
+
17 0.988011 2.6859 0.00058417 2023-02-25 04:40:52.415416 17
|
44 |
+
18 0.980583 2.6660 0.00057926 2023-02-25 05:55:25.097743 18
|
45 |
+
19 0.976156 2.6542 0.00057439 2023-02-25 07:10:08.750546 19
|
46 |
+
20 0.967044 2.6302 0.00056957 2023-02-25 08:24:09.951182 20
|
47 |
+
21 0.962083 2.6171 0.00056478 2023-02-25 09:31:38.754236 21
|
48 |
+
22 0.954681 2.5978 0.00056003 2023-02-25 10:39:00.171111 22
|
49 |
+
23 0.952402 2.5919 0.00055533 2023-02-25 11:46:20.039013 23
|
50 |
+
24 0.946331 2.5762 0.00055066 2023-02-25 12:53:38.897788 24
|
51 |
+
25 0.938929 2.5572 0.00054603 2023-02-25 14:00:59.019809 25
|
52 |
+
26 0.933932 2.5445 0.00054144 2023-02-25 15:08:24.585025 26
|
53 |
+
27 0.928662 2.5311 0.00053689 2023-02-25 16:15:49.109635 27
|
54 |
+
28 0.920527 2.5106 0.00053238 2023-02-25 17:23:08.213674 28
|
55 |
+
29 0.921217 2.5123 0.00052791 2023-02-25 18:30:28.765746 29
|
56 |
+
30 0.912321 2.4901 0.00052347 2023-02-25 19:40:13.607648 30
|
57 |
+
31 0.908691 2.4811 0.00051907 2023-02-25 20:50:18.471005 31
|
58 |
+
32 0.904814 2.4715 0.00051471 2023-02-25 22:00:29.135635 32
|
59 |
+
33 0.899443 2.4582 0.00051039 2023-02-25 23:10:40.333796 33
|
60 |
+
34 0.892103 2.4403 0.00050610 2023-02-26 00:20:44.211534 34
|
61 |
+
35 0.890879 2.4373 0.00050184 2023-02-26 01:30:47.505920 35
|
62 |
+
36 0.885182 2.4234 0.00049763 2023-02-26 02:40:50.286844 36
|
63 |
+
37 0.879235 2.4091 0.00049344 2023-02-26 03:50:53.387048 37
|
64 |
+
38 0.880928 2.4131 0.00048930 2023-02-26 05:00:56.710483 38
|
65 |
+
39 0.876188 2.4017 0.00048519 2023-02-26 06:10:59.181386 39
|
66 |
+
40 0.872161 2.3921 0.00048111 2023-02-26 07:21:22.994460 40
|
67 |
+
41 0.870189 2.3874 0.00047707 2023-02-26 08:31:40.223079 41
|
68 |
+
42 0.862402 2.3688 0.00047306 2023-02-26 09:41:54.254247 42
|
69 |
+
43 0.857510 2.3573 0.00046908 2023-02-26 10:52:08.245272 43
|
70 |
+
44 0.856999 2.3561 0.00046514 2023-02-26 12:02:22.399308 44
|
71 |
+
45 0.853079 2.3469 0.00046123 2023-02-26 13:12:37.250985 45
|
72 |
+
46 0.851484 2.3431 0.00045735 2023-02-26 14:22:50.660896 46
|
73 |
+
47 0.843223 2.3238 0.00045351 2023-02-26 15:33:05.060451 47
|
74 |
+
48 0.840954 2.3186 0.00044970 2023-02-26 16:45:37.159289 48
|
75 |
+
49 0.838571 2.3131 0.00044592 2023-02-26 17:59:17.895446 49
|