Spaces:
Sleeping
Sleeping
Upload 12 files
Browse files- aglib/meliad/transformer/configs/size/large_1200M.gin +7 -0
- aglib/meliad/transformer/configs/size/large_600M.gin +8 -0
- aglib/meliad/transformer/configs/size/layer13.gin +4 -0
- aglib/meliad/transformer/configs/size/layer24.gin +4 -0
- aglib/meliad/transformer/configs/size/layer26.gin +4 -0
- aglib/meliad/transformer/configs/size/medium_150M.gin +8 -0
- aglib/meliad/transformer/configs/size/medium_300M.gin +8 -0
- aglib/meliad/transformer/configs/size/small.gin +7 -0
- aglib/meliad/transformer/configs/size/small_37M.gin +8 -0
- aglib/meliad/transformer/configs/size/small_75M.gin +8 -0
- aglib/meliad/transformer/configs/size/small_test.gin +37 -0
- aglib/meliad/transformer/configs/size/tiny_test.gin +27 -0
aglib/meliad/transformer/configs/size/large_1200M.gin
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# Number of parameters = 1208M
|
3 |
+
NUM_LAYERS = 12
|
4 |
+
EMBED_DIM = 2048
|
5 |
+
NUM_HEADS = 32
|
6 |
+
HEAD_DIM = 128
|
7 |
+
MLP_DIM = 16384
|
aglib/meliad/transformer/configs/size/large_600M.gin
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# Number of parameters = 605M
|
3 |
+
NUM_LAYERS = 12
|
4 |
+
EMBED_DIM = 2048
|
5 |
+
NUM_HEADS = 16
|
6 |
+
HEAD_DIM = 128
|
7 |
+
MLP_DIM = 8192
|
8 |
+
|
aglib/meliad/transformer/configs/size/layer13.gin
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# Use 13 layers, for comparison against recurrent transformers.
|
3 |
+
|
4 |
+
NUM_LAYERS = 13
|
aglib/meliad/transformer/configs/size/layer24.gin
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# Tall configuration, which doubles the number of layers and parameters.
|
3 |
+
|
4 |
+
NUM_LAYERS = 24
|
aglib/meliad/transformer/configs/size/layer26.gin
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# Use 26 layers, for comparison against tall recurrent transformers.
|
3 |
+
|
4 |
+
NUM_LAYERS = 26
|
aglib/meliad/transformer/configs/size/medium_150M.gin
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# Number of parameters = 151M
|
3 |
+
NUM_LAYERS = 12
|
4 |
+
EMBED_DIM = 1024
|
5 |
+
NUM_HEADS = 8
|
6 |
+
HEAD_DIM = 128
|
7 |
+
MLP_DIM = 4096
|
8 |
+
|
aglib/meliad/transformer/configs/size/medium_300M.gin
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# Number of parameters = 302M
|
3 |
+
NUM_LAYERS = 12
|
4 |
+
EMBED_DIM = 1024
|
5 |
+
NUM_HEADS = 16
|
6 |
+
HEAD_DIM = 128
|
7 |
+
MLP_DIM = 8192
|
8 |
+
|
aglib/meliad/transformer/configs/size/small.gin
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
NUM_LAYERS = 6
|
3 |
+
EMBED_DIM = 512
|
4 |
+
NUM_HEADS = 8
|
5 |
+
HEAD_DIM = 128
|
6 |
+
MLP_DIM = 2048
|
7 |
+
|
aglib/meliad/transformer/configs/size/small_37M.gin
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# Number of parameters = 37M.
|
3 |
+
NUM_LAYERS = 12
|
4 |
+
EMBED_DIM = 512
|
5 |
+
NUM_HEADS = 8
|
6 |
+
HEAD_DIM = 64
|
7 |
+
MLP_DIM = 2048
|
8 |
+
|
aglib/meliad/transformer/configs/size/small_75M.gin
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# Number of parameters = 75M
|
3 |
+
NUM_LAYERS = 12
|
4 |
+
EMBED_DIM = 512
|
5 |
+
NUM_HEADS = 8
|
6 |
+
HEAD_DIM = 128
|
7 |
+
MLP_DIM = 4096
|
8 |
+
|
aglib/meliad/transformer/configs/size/small_test.gin
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Small config for testing purposes
|
2 |
+
|
3 |
+
NUM_LAYERS = 6
|
4 |
+
EMBED_DIM = 512
|
5 |
+
NUM_HEADS = 8
|
6 |
+
HEAD_DIM = 128
|
7 |
+
MLP_DIM = 2048
|
8 |
+
DROPOUT_RATE = 0.1
|
9 |
+
ATTN_DROPOUT_RATE = 0.1
|
10 |
+
|
11 |
+
decoder_stack.TransformerTaskConfig:
|
12 |
+
sequence_length = 512
|
13 |
+
batch_size = 2
|
14 |
+
|
15 |
+
transformer_layer.TransformerLayer:
|
16 |
+
window_length = 256
|
17 |
+
use_long_xl_architecture = True
|
18 |
+
max_unrolled_windows = -1
|
19 |
+
recurrent_num_states = 384 # Odd number for debugging purposes.
|
20 |
+
recurrent_gate_type = "bias"
|
21 |
+
recurrent_single_gate = False
|
22 |
+
recurrent_skip_ffn = True
|
23 |
+
|
24 |
+
decoder_stack.DecoderStack:
|
25 |
+
dstack_window_length = 0
|
26 |
+
recurrent_layer_indices = () # (-1,)
|
27 |
+
feedback_recurrence = False
|
28 |
+
|
29 |
+
training_loop.Trainer:
|
30 |
+
num_steps = 10_000
|
31 |
+
status_every_steps = 5
|
32 |
+
log_every_steps = 20
|
33 |
+
test_every_steps = 50
|
34 |
+
num_test_steps = 2
|
35 |
+
generate_every_steps = 100
|
36 |
+
print_input_every_steps = 100
|
37 |
+
checkpoint_every_steps = 200
|
aglib/meliad/transformer/configs/size/tiny_test.gin
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Tiny config for testing purposes.
|
2 |
+
|
3 |
+
NUM_LAYERS = 2
|
4 |
+
EMBED_DIM = 128
|
5 |
+
NUM_HEADS = 4
|
6 |
+
HEAD_DIM = 32
|
7 |
+
MLP_DIM = 256
|
8 |
+
DROPOUT_RATE = 0.1
|
9 |
+
ATTN_DROPOUT_RATE = 0.1
|
10 |
+
|
11 |
+
decoder_stack.TransformerTaskConfig:
|
12 |
+
sequence_length = 256
|
13 |
+
batch_size = 1
|
14 |
+
|
15 |
+
transformer_layer.TransformerLayer:
|
16 |
+
window_length = 128
|
17 |
+
use_long_xl_architecture = True
|
18 |
+
|
19 |
+
training_loop.Trainer:
|
20 |
+
num_steps = 1000
|
21 |
+
warmup_steps = 100
|
22 |
+
log_every_steps = 10
|
23 |
+
test_every_steps = 10
|
24 |
+
num_test_steps = 1
|
25 |
+
generate_every_steps = 100
|
26 |
+
print_input_every_steps = 100
|
27 |
+
checkpoint_every_steps = 100
|