danielhanchen commited on
Commit
bd6c9b7
·
verified ·
1 Parent(s): 20cfd0e

Add files using upload-large-folder tool

Browse files
.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  tekken.json filter=lfs diff=lfs merge=lfs -text
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  tekken.json filter=lfs diff=lfs merge=lfs -text
37
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "unsloth/Mistral-Nemo-Base-2407",
3
  "architectures": [
4
  "MistralForCausalLM"
5
  ],
@@ -11,7 +11,7 @@
11
  "hidden_size": 5120,
12
  "initializer_range": 0.02,
13
  "intermediate_size": 14336,
14
- "max_position_embeddings": 1024000,
15
  "model_type": "mistral",
16
  "num_attention_heads": 32,
17
  "num_hidden_layers": 40,
@@ -26,7 +26,12 @@
26
  "bnb_4bit_use_double_quant": true,
27
  "llm_int8_enable_fp32_cpu_offload": false,
28
  "llm_int8_has_fp16_weight": false,
29
- "llm_int8_skip_modules": null,
 
 
 
 
 
30
  "llm_int8_threshold": 6.0,
31
  "load_in_4bit": true,
32
  "load_in_8bit": false,
@@ -37,8 +42,8 @@
37
  "sliding_window": null,
38
  "tie_word_embeddings": false,
39
  "torch_dtype": "bfloat16",
40
- "transformers_version": "4.44.2",
41
- "unsloth_version": "2024.9",
42
  "use_cache": true,
43
  "vocab_size": 131072
44
  }
 
1
  {
2
+ "_name_or_path": "mistralai/Mistral-Nemo-Base-2407",
3
  "architectures": [
4
  "MistralForCausalLM"
5
  ],
 
11
  "hidden_size": 5120,
12
  "initializer_range": 0.02,
13
  "intermediate_size": 14336,
14
+ "max_position_embeddings": 131072,
15
  "model_type": "mistral",
16
  "num_attention_heads": 32,
17
  "num_hidden_layers": 40,
 
26
  "bnb_4bit_use_double_quant": true,
27
  "llm_int8_enable_fp32_cpu_offload": false,
28
  "llm_int8_has_fp16_weight": false,
29
+ "llm_int8_skip_modules": [
30
+ "lm_head",
31
+ "multi_modal_projector",
32
+ "merger",
33
+ "modality_projection"
34
+ ],
35
  "llm_int8_threshold": 6.0,
36
  "load_in_4bit": true,
37
  "load_in_8bit": false,
 
42
  "sliding_window": null,
43
  "tie_word_embeddings": false,
44
  "torch_dtype": "bfloat16",
45
+ "transformers_version": "4.49.0.dev0",
46
+ "unsloth_fixed": true,
47
  "use_cache": true,
48
  "vocab_size": 131072
49
  }
generation_config.json CHANGED
@@ -2,7 +2,7 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
- "max_length": 1024000,
6
  "pad_token_id": 10,
7
- "transformers_version": "4.44.2"
8
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
+ "max_length": 131072,
6
  "pad_token_id": 10,
7
+ "transformers_version": "4.49.0.dev0"
8
  }
special_tokens_map.json CHANGED
@@ -13,7 +13,13 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": "<pad>",
 
 
 
 
 
 
17
  "unk_token": {
18
  "content": "<unk>",
19
  "lstrip": false,
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": {
17
+ "content": "<pad>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
  "unk_token": {
24
  "content": "<unk>",
25
  "lstrip": false,
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -8007,7 +8007,8 @@
8007
  "bos_token": "<s>",
8008
  "clean_up_tokenization_spaces": false,
8009
  "eos_token": "</s>",
8010
- "model_max_length": 1000000000000000019884624838656,
 
8011
  "pad_token": "<pad>",
8012
  "padding_side": "left",
8013
  "tokenizer_class": "PreTrainedTokenizerFast",
 
8007
  "bos_token": "<s>",
8008
  "clean_up_tokenization_spaces": false,
8009
  "eos_token": "</s>",
8010
+ "extra_special_tokens": {},
8011
+ "model_max_length": 131072,
8012
  "pad_token": "<pad>",
8013
  "padding_side": "left",
8014
  "tokenizer_class": "PreTrainedTokenizerFast",