FrankC0st1e commited on
Commit
8fa0de6
·
1 Parent(s): 91a5e0a

change name to minicpm3

Browse files
Files changed (3) hide show
  1. config.json +7 -7
  2. configuration_minicpm.py +1 -1
  3. modeling_minicpm.py +4 -4
config.json CHANGED
@@ -4,14 +4,14 @@
4
  "MiniCPM3ForCausalLM"
5
  ],
6
  "auto_map": {
7
- "AutoConfig": "configuration_minicpm.MiniCPMConfig",
8
- "AutoModel": "modeling_minicpm.MiniCPMModel",
9
- "AutoModelForCausalLM": "modeling_minicpm.MiniCPMForCausalLM",
10
- "AutoModelForSeq2SeqLM": "modeling_minicpm.MiniCPMForCausalLM",
11
- "AutoModelForSequenceClassification": "modeling_minicpm.MiniCPMForSequenceClassification"
12
  },
13
  "bos_token_id": 1,
14
- "eos_token_id": 2,
15
  "hidden_act": "silu",
16
  "initializer_range": 0.1,
17
  "hidden_size": 2560,
@@ -32,7 +32,7 @@
32
  "original_max_position_embeddings": 32768
33
  },
34
  "torch_dtype": "bfloat16",
35
- "transformers_version": "4.36.0",
36
  "use_cache": true,
37
  "vocab_size": 73448,
38
  "scale_emb": 12,
 
4
  "MiniCPM3ForCausalLM"
5
  ],
6
  "auto_map": {
7
+ "AutoConfig": "configuration_minicpm.MiniCPM3Config",
8
+ "AutoModel": "modeling_minicpm.MiniCPM3Model",
9
+ "AutoModelForCausalLM": "modeling_minicpm.MiniCPM3ForCausalLM",
10
+ "AutoModelForSeq2SeqLM": "modeling_minicpm.MiniCPM3ForCausalLM",
11
+ "AutoModelForSequenceClassification": "modeling_minicpm.MiniCPM3ForSequenceClassification"
12
  },
13
  "bos_token_id": 1,
14
+ "eos_token_id": [2, 73440],
15
  "hidden_act": "silu",
16
  "initializer_range": 0.1,
17
  "hidden_size": 2560,
 
32
  "original_max_position_embeddings": 32768
33
  },
34
  "torch_dtype": "bfloat16",
35
+ "transformers_version": "4.41.0",
36
  "use_cache": true,
37
  "vocab_size": 73448,
38
  "scale_emb": 12,
configuration_minicpm.py CHANGED
@@ -28,7 +28,7 @@ logger = logging.get_logger(__name__)
28
  MINICPM_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
29
 
30
 
31
- class MiniCPMConfig(PretrainedConfig):
32
  r"""
33
  This is the configuration class to store the configuration of a [`MiniCPMModel`]. It is used to instantiate an MiniCPM
34
  model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
 
28
  MINICPM_PRETRAINED_CONFIG_ARCHIVE_MAP = {}
29
 
30
 
31
+ class MiniCPM3Config(PretrainedConfig):
32
  r"""
33
  This is the configuration class to store the configuration of a [`MiniCPMModel`]. It is used to instantiate an MiniCPM
34
  model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
modeling_minicpm.py CHANGED
@@ -979,7 +979,7 @@ MINICPM_START_DOCSTRING = r"""
979
  "The bare MiniCPM Model outputting raw hidden-states without any specific head on top.",
980
  MINICPM_START_DOCSTRING,
981
  )
982
- class MiniCPMPreTrainedModel(PreTrainedModel):
983
  config_class = MiniCPMConfig
984
  base_model_prefix = "model"
985
  supports_gradient_checkpointing = True
@@ -1075,7 +1075,7 @@ MINICPM_INPUTS_DOCSTRING = r"""
1075
  "The bare MiniCPM Model outputting raw hidden-states without any specific head on top.",
1076
  MINICPM_START_DOCSTRING,
1077
  )
1078
- class MiniCPMModel(MiniCPMPreTrainedModel):
1079
  """
1080
  Transformer decoder consisting of *config.num_hidden_layers* layers. Each layer is a [`MiniCPMDecoderLayer`]
1081
 
@@ -1239,7 +1239,7 @@ class MiniCPMModel(MiniCPMPreTrainedModel):
1239
  )
1240
 
1241
 
1242
- class MiniCPMForCausalLM(MiniCPMPreTrainedModel):
1243
  _tied_weights_keys = ["lm_head.weight"]
1244
 
1245
  def __init__(self, config):
@@ -1465,7 +1465,7 @@ class MiniCPMForCausalLM(MiniCPMPreTrainedModel):
1465
  """,
1466
  MINICPM_START_DOCSTRING,
1467
  )
1468
- class MiniCPMForSequenceClassification(MiniCPMPreTrainedModel):
1469
  def __init__(self, config):
1470
  super().__init__(config)
1471
  self.num_labels = config.num_labels
 
979
  "The bare MiniCPM Model outputting raw hidden-states without any specific head on top.",
980
  MINICPM_START_DOCSTRING,
981
  )
982
+ class MiniCPM3PreTrainedModel(PreTrainedModel):
983
  config_class = MiniCPMConfig
984
  base_model_prefix = "model"
985
  supports_gradient_checkpointing = True
 
1075
  "The bare MiniCPM Model outputting raw hidden-states without any specific head on top.",
1076
  MINICPM_START_DOCSTRING,
1077
  )
1078
+ class MiniCPM3Model(MiniCPM3PreTrainedModel):
1079
  """
1080
  Transformer decoder consisting of *config.num_hidden_layers* layers. Each layer is a [`MiniCPMDecoderLayer`]
1081
 
 
1239
  )
1240
 
1241
 
1242
+ class MiniCPM3ForCausalLM(MiniCPM3PreTrainedModel):
1243
  _tied_weights_keys = ["lm_head.weight"]
1244
 
1245
  def __init__(self, config):
 
1465
  """,
1466
  MINICPM_START_DOCSTRING,
1467
  )
1468
+ class MiniCPM3ForSequenceClassification(MiniCPM3PreTrainedModel):
1469
  def __init__(self, config):
1470
  super().__init__(config)
1471
  self.num_labels = config.num_labels