HugoVoxx commited on
Commit
8758510
·
verified ·
1 Parent(s): 82bcc4f

Upload 12 files

Browse files
aglib/meliad/transformer/configs/size/large_1200M.gin ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+
2
+ # Number of parameters = 1208M
3
+ NUM_LAYERS = 12
4
+ EMBED_DIM = 2048
5
+ NUM_HEADS = 32
6
+ HEAD_DIM = 128
7
+ MLP_DIM = 16384
aglib/meliad/transformer/configs/size/large_600M.gin ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+
2
+ # Number of parameters = 605M
3
+ NUM_LAYERS = 12
4
+ EMBED_DIM = 2048
5
+ NUM_HEADS = 16
6
+ HEAD_DIM = 128
7
+ MLP_DIM = 8192
8
+
aglib/meliad/transformer/configs/size/layer13.gin ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+
2
+ # Use 13 layers, for comparison against recurrent transformers.
3
+
4
+ NUM_LAYERS = 13
aglib/meliad/transformer/configs/size/layer24.gin ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+
2
+ # Tall configuration, which doubles the number of layers and parameters.
3
+
4
+ NUM_LAYERS = 24
aglib/meliad/transformer/configs/size/layer26.gin ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+
2
+ # Use 26 layers, for comparison against tall recurrent transformers.
3
+
4
+ NUM_LAYERS = 26
aglib/meliad/transformer/configs/size/medium_150M.gin ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+
2
+ # Number of parameters = 151M
3
+ NUM_LAYERS = 12
4
+ EMBED_DIM = 1024
5
+ NUM_HEADS = 8
6
+ HEAD_DIM = 128
7
+ MLP_DIM = 4096
8
+
aglib/meliad/transformer/configs/size/medium_300M.gin ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+
2
+ # Number of parameters = 302M
3
+ NUM_LAYERS = 12
4
+ EMBED_DIM = 1024
5
+ NUM_HEADS = 16
6
+ HEAD_DIM = 128
7
+ MLP_DIM = 8192
8
+
aglib/meliad/transformer/configs/size/small.gin ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+
2
+ NUM_LAYERS = 6
3
+ EMBED_DIM = 512
4
+ NUM_HEADS = 8
5
+ HEAD_DIM = 128
6
+ MLP_DIM = 2048
7
+
aglib/meliad/transformer/configs/size/small_37M.gin ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+
2
+ # Number of parameters = 37M.
3
+ NUM_LAYERS = 12
4
+ EMBED_DIM = 512
5
+ NUM_HEADS = 8
6
+ HEAD_DIM = 64
7
+ MLP_DIM = 2048
8
+
aglib/meliad/transformer/configs/size/small_75M.gin ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+
2
+ # Number of parameters = 75M
3
+ NUM_LAYERS = 12
4
+ EMBED_DIM = 512
5
+ NUM_HEADS = 8
6
+ HEAD_DIM = 128
7
+ MLP_DIM = 4096
8
+
aglib/meliad/transformer/configs/size/small_test.gin ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Small config for testing purposes
2
+
3
+ NUM_LAYERS = 6
4
+ EMBED_DIM = 512
5
+ NUM_HEADS = 8
6
+ HEAD_DIM = 128
7
+ MLP_DIM = 2048
8
+ DROPOUT_RATE = 0.1
9
+ ATTN_DROPOUT_RATE = 0.1
10
+
11
+ decoder_stack.TransformerTaskConfig:
12
+ sequence_length = 512
13
+ batch_size = 2
14
+
15
+ transformer_layer.TransformerLayer:
16
+ window_length = 256
17
+ use_long_xl_architecture = True
18
+ max_unrolled_windows = -1
19
+ recurrent_num_states = 384 # Odd number for debugging purposes.
20
+ recurrent_gate_type = "bias"
21
+ recurrent_single_gate = False
22
+ recurrent_skip_ffn = True
23
+
24
+ decoder_stack.DecoderStack:
25
+ dstack_window_length = 0
26
+ recurrent_layer_indices = () # (-1,)
27
+ feedback_recurrence = False
28
+
29
+ training_loop.Trainer:
30
+ num_steps = 10_000
31
+ status_every_steps = 5
32
+ log_every_steps = 20
33
+ test_every_steps = 50
34
+ num_test_steps = 2
35
+ generate_every_steps = 100
36
+ print_input_every_steps = 100
37
+ checkpoint_every_steps = 200
aglib/meliad/transformer/configs/size/tiny_test.gin ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Tiny config for testing purposes.
2
+
3
+ NUM_LAYERS = 2
4
+ EMBED_DIM = 128
5
+ NUM_HEADS = 4
6
+ HEAD_DIM = 32
7
+ MLP_DIM = 256
8
+ DROPOUT_RATE = 0.1
9
+ ATTN_DROPOUT_RATE = 0.1
10
+
11
+ decoder_stack.TransformerTaskConfig:
12
+ sequence_length = 256
13
+ batch_size = 1
14
+
15
+ transformer_layer.TransformerLayer:
16
+ window_length = 128
17
+ use_long_xl_architecture = True
18
+
19
+ training_loop.Trainer:
20
+ num_steps = 1000
21
+ warmup_steps = 100
22
+ log_every_steps = 10
23
+ test_every_steps = 10
24
+ num_test_steps = 1
25
+ generate_every_steps = 100
26
+ print_input_every_steps = 100
27
+ checkpoint_every_steps = 100