Crystalcareai
commited on
Update configuration_quiet.py
Browse files- configuration_quiet.py +19 -22
configuration_quiet.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
# coding=utf-8
|
2 |
-
# Copyright 2023
|
3 |
#
|
4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
# you may not use this file except in compliance with the License.
|
@@ -12,26 +12,23 @@
|
|
12 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
-
"""
|
16 |
|
17 |
-
from
|
18 |
-
from
|
19 |
|
20 |
|
21 |
logger = logging.get_logger(__name__)
|
22 |
|
23 |
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
class MistralConfig(PretrainedConfig):
|
28 |
r"""
|
29 |
-
This is the configuration class to store the configuration of a [`
|
30 |
-
|
31 |
-
with the defaults will yield a similar configuration to that of the
|
32 |
|
33 |
-
[
|
34 |
-
[
|
35 |
|
36 |
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
|
37 |
documentation from [`PretrainedConfig`] for more information.
|
@@ -39,8 +36,8 @@ class MistralConfig(PretrainedConfig):
|
|
39 |
|
40 |
Args:
|
41 |
vocab_size (`int`, *optional*, defaults to 32000):
|
42 |
-
Vocabulary size of the
|
43 |
-
`inputs_ids` passed when calling [`
|
44 |
hidden_size (`int`, *optional*, defaults to 4096):
|
45 |
Dimension of the hidden representations.
|
46 |
intermediate_size (`int`, *optional*, defaults to 14336):
|
@@ -59,7 +56,7 @@ class MistralConfig(PretrainedConfig):
|
|
59 |
hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
|
60 |
The non-linear activation function (function or string) in the decoder.
|
61 |
max_position_embeddings (`int`, *optional*, defaults to `4096*32`):
|
62 |
-
The maximum sequence length that this model might ever be used with.
|
63 |
allows sequence of up to 4096*32 tokens.
|
64 |
initializer_range (`float`, *optional*, defaults to 0.02):
|
65 |
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
@@ -84,19 +81,19 @@ class MistralConfig(PretrainedConfig):
|
|
84 |
The dropout ratio for the attention probabilities.
|
85 |
|
86 |
```python
|
87 |
-
>>> from transformers import
|
88 |
|
89 |
-
>>> # Initializing a
|
90 |
-
>>> configuration =
|
91 |
|
92 |
-
>>> # Initializing a model from the
|
93 |
-
>>> model =
|
94 |
|
95 |
>>> # Accessing the model configuration
|
96 |
>>> configuration = model.config
|
97 |
```"""
|
98 |
|
99 |
-
model_type = "
|
100 |
keys_to_ignore_at_inference = ["past_key_values"]
|
101 |
|
102 |
def __init__(
|
|
|
1 |
# coding=utf-8
|
2 |
+
# Copyright 2023 Quiet AI and the HuggingFace Inc. team. All rights reserved.
|
3 |
#
|
4 |
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
# you may not use this file except in compliance with the License.
|
|
|
12 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
# See the License for the specific language governing permissions and
|
14 |
# limitations under the License.
|
15 |
+
""" Quiet model configuration"""
|
16 |
|
17 |
+
from transformers.configuration_utils import PretrainedConfig
|
18 |
+
from transformers.utils import logging
|
19 |
|
20 |
|
21 |
logger = logging.get_logger(__name__)
|
22 |
|
23 |
|
24 |
+
class QuietConfig(PretrainedConfig):
|
|
|
|
|
|
|
25 |
r"""
|
26 |
+
This is the configuration class to store the configuration of a [`QuietModel`]. It is used to instantiate an
|
27 |
+
Quiet model according to the specified arguments, defining the model architecture. Instantiating a configuration
|
28 |
+
with the defaults will yield a similar configuration to that of the Quiet-7B-v0.1 or Quiet-7B-Instruct-v0.1.
|
29 |
|
30 |
+
[quietai/Quiet-7B-v0.1](https://huggingface.co/quietai/Quiet-7B-v0.1)
|
31 |
+
[quietai/Quiet-7B-Instruct-v0.1](https://huggingface.co/quietai/Quiet-7B-Instruct-v0.1)
|
32 |
|
33 |
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
|
34 |
documentation from [`PretrainedConfig`] for more information.
|
|
|
36 |
|
37 |
Args:
|
38 |
vocab_size (`int`, *optional*, defaults to 32000):
|
39 |
+
Vocabulary size of the Quiet model. Defines the number of different tokens that can be represented by the
|
40 |
+
`inputs_ids` passed when calling [`QuietModel`]
|
41 |
hidden_size (`int`, *optional*, defaults to 4096):
|
42 |
Dimension of the hidden representations.
|
43 |
intermediate_size (`int`, *optional*, defaults to 14336):
|
|
|
56 |
hidden_act (`str` or `function`, *optional*, defaults to `"silu"`):
|
57 |
The non-linear activation function (function or string) in the decoder.
|
58 |
max_position_embeddings (`int`, *optional*, defaults to `4096*32`):
|
59 |
+
The maximum sequence length that this model might ever be used with. Quiet's sliding window attention
|
60 |
allows sequence of up to 4096*32 tokens.
|
61 |
initializer_range (`float`, *optional*, defaults to 0.02):
|
62 |
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
|
|
81 |
The dropout ratio for the attention probabilities.
|
82 |
|
83 |
```python
|
84 |
+
>>> from transformers import QuietModel, QuietConfig
|
85 |
|
86 |
+
>>> # Initializing a Quiet 7B style configuration
|
87 |
+
>>> configuration = QuietConfig()
|
88 |
|
89 |
+
>>> # Initializing a model from the Quiet 7B style configuration
|
90 |
+
>>> model = QuietModel(configuration)
|
91 |
|
92 |
>>> # Accessing the model configuration
|
93 |
>>> configuration = model.config
|
94 |
```"""
|
95 |
|
96 |
+
model_type = "quiet"
|
97 |
keys_to_ignore_at_inference = ["past_key_values"]
|
98 |
|
99 |
def __init__(
|