xhyi commited on
Commit
f775cfa
·
1 Parent(s): eb1ad5b

Upload configuration_codegen.py

Browse files
Files changed (1) hide show
  1. configuration_codegen.py +87 -0
configuration_codegen.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2021 The EleutherAI and HuggingFace Teams. All rights reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ # Modified configuration implementation based on https://github.com/huggingface/transformers/blob/main/src/transformers/models/gptj/configuration_gptj.py
17
+
18
+ from transformers.configuration_utils import PretrainedConfig
19
+ from transformers.utils import logging
20
+
21
+ logger = logging.get_logger(__name__)
22
+
23
+
24
+ class CodeGenConfig(PretrainedConfig):
25
+ model_type = "codegen"
26
+
27
+ def __init__(
28
+ self,
29
+ vocab_size=50400,
30
+ n_positions=2048,
31
+ n_ctx=2048,
32
+ n_embd=4096,
33
+ n_layer=28,
34
+ n_head=16,
35
+ rotary_dim=64,
36
+ n_inner=None,
37
+ activation_function="gelu_new",
38
+ resid_pdrop=0.0,
39
+ embd_pdrop=0.0,
40
+ attn_pdrop=0.0,
41
+ layer_norm_epsilon=1e-5,
42
+ initializer_range=0.02,
43
+ scale_attn_weights=True,
44
+ gradient_checkpointing=False,
45
+ use_cache=True,
46
+ bos_token_id=50256,
47
+ eos_token_id=50256,
48
+ **kwargs
49
+ ):
50
+ super().__init__(bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
51
+
52
+ self.vocab_size = vocab_size
53
+ self.n_ctx = n_ctx
54
+ self.n_positions = n_positions
55
+ self.n_embd = n_embd
56
+ self.n_layer = n_layer
57
+ self.n_head = n_head
58
+ self.n_inner = n_inner
59
+ self.rotary_dim = rotary_dim
60
+ self.activation_function = activation_function
61
+ self.resid_pdrop = resid_pdrop
62
+ self.embd_pdrop = embd_pdrop
63
+ self.attn_pdrop = attn_pdrop
64
+ self.layer_norm_epsilon = layer_norm_epsilon
65
+ self.initializer_range = initializer_range
66
+ self.gradient_checkpointing = gradient_checkpointing
67
+ self.scale_attn_weights = scale_attn_weights
68
+ self.use_cache = use_cache
69
+
70
+ self.bos_token_id = bos_token_id
71
+ self.eos_token_id = eos_token_id
72
+
73
+ @property
74
+ def max_position_embeddings(self):
75
+ return self.n_positions
76
+
77
+ @property
78
+ def hidden_size(self):
79
+ return self.n_embd
80
+
81
+ @property
82
+ def num_attention_heads(self):
83
+ return self.n_head
84
+
85
+ @property
86
+ def num_hidden_layers(self):
87
+ return self.n_layer