mhm-8x7B-FrankenMoE-v1.0 - exl2 6.0
Browse files- README.md +66 -0
- config.json +41 -0
- huggingface-metadata.txt +15 -0
- model.safetensors.index.json +1 -0
- output-00001-of-00005.safetensors +3 -0
- output-00002-of-00005.safetensors +3 -0
- output-00003-of-00005.safetensors +3 -0
- output-00004-of-00005.safetensors +3 -0
- output-00005-of-00005.safetensors +3 -0
- special_tokens_map.json +29 -0
- tokenizer.json +0 -0
- tokenizer.model +3 -0
- tokenizer_config.json +48 -0
README.md
CHANGED
@@ -1,3 +1,69 @@
|
|
1 |
---
|
2 |
license: apache-2.0
|
|
|
|
|
|
|
|
|
|
|
3 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
license: apache-2.0
|
3 |
+
language:
|
4 |
+
- en
|
5 |
+
tags:
|
6 |
+
- merge
|
7 |
+
- moe
|
8 |
---
|
9 |
+
|
10 |
+

|
11 |
+
|
12 |
+
## Recipe for a Beautiful Frankenstein
|
13 |
+
|
14 |
+
In the laboratory of the mind, where thoughts entwine,
|
15 |
+
MHM and MOE, a potion for a unique design.
|
16 |
+
With stitches of curiosity and bolts of creativity,
|
17 |
+
8 times 7, the magic number, a poetic proclivity.
|
18 |
+
|
19 |
+
### Ingredients:
|
20 |
+
|
21 |
+
- **MHM:** A dash of mystery, a sprinkle of hum,
|
22 |
+
Blend with a melody, let the heartstrings strum.
|
23 |
+
Murmurs in the shadows, whispers in the light,
|
24 |
+
Stir the concoction gently, make the emotions ignite.
|
25 |
+
|
26 |
+
- **MOE:** Essence of the moment, like dew on a rose,
|
27 |
+
Capture the now, before time swiftly goes.
|
28 |
+
Colors of experience, a palette so divine,
|
29 |
+
Mix with MHM, let the fusion entwine.
|
30 |
+
|
31 |
+
### Directions:
|
32 |
+
|
33 |
+
1. **Take 8 parts MHM,** elusive and profound,
|
34 |
+
Let it dance in your thoughts, on imagination's ground.
|
35 |
+
Blend it with the echoes, the silent undertones,
|
36 |
+
A symphony of ideas, where inspiration condones.
|
37 |
+
|
38 |
+
2. **Add 7 parts MOE,** the fleeting embrace,
|
39 |
+
Seize the seconds, let them leave a trace.
|
40 |
+
Infuse it with memories, both bitter and sweet,
|
41 |
+
The tapestry of time, where moments and dreams meet.
|
42 |
+
|
43 |
+
3. **Stir the potion with wonder,** a wand of delight,
|
44 |
+
Let the sparks fly, in the dark of the night.
|
45 |
+
Watch as the alchemy unfolds its grand design,
|
46 |
+
MHM and MOE, a beautiful Frankenstein.
|
47 |
+
|
48 |
+
### Conclusion:
|
49 |
+
|
50 |
+
In the laboratory of life, where dreams come alive,
|
51 |
+
MHM and MOE, the recipe to thrive.
|
52 |
+
A creation so poetic, a fusion so divine,
|
53 |
+
8 times 7, a symphony of time.
|
54 |
+
|
55 |
+
As the echoes resonate, and the moments blend,
|
56 |
+
A masterpiece unfolds, where beginnings and ends,
|
57 |
+
MHM and MOE, a concoction so rare,
|
58 |
+
A beautiful Frankenstein, beyond compare.
|
59 |
+
|
60 |
+
---
|
61 |
+
|
62 |
+
MoE model build with:
|
63 |
+
1. https://github.com/cg123/mergekit/tree/mixtral
|
64 |
+
2. Mistral models, latest merges and fine tunes.
|
65 |
+
3. Expert prompts heavily inspired by https://huggingface.co/Kquant03/Eukaryote-8x7B-bf16
|
66 |
+
|
67 |
+
For details check model files, there is config yaml I used to create that model.
|
68 |
+
|
69 |
+
Come back later for more details.
|
config.json
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "mlabonne/Beagle14-7B",
|
3 |
+
"architectures": [
|
4 |
+
"MixtralForCausalLM"
|
5 |
+
],
|
6 |
+
"attention_dropout": 0.0,
|
7 |
+
"bos_token_id": 1,
|
8 |
+
"eos_token_id": 2,
|
9 |
+
"hidden_act": "silu",
|
10 |
+
"hidden_size": 4096,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 14336,
|
13 |
+
"max_position_embeddings": 32768,
|
14 |
+
"model_type": "mixtral",
|
15 |
+
"num_attention_heads": 32,
|
16 |
+
"num_experts_per_tok": 2,
|
17 |
+
"num_hidden_layers": 32,
|
18 |
+
"num_key_value_heads": 8,
|
19 |
+
"num_local_experts": 8,
|
20 |
+
"output_router_logits": false,
|
21 |
+
"rms_norm_eps": 1e-05,
|
22 |
+
"rope_theta": 10000.0,
|
23 |
+
"router_aux_loss_coef": 0.001,
|
24 |
+
"sliding_window": null,
|
25 |
+
"tie_word_embeddings": false,
|
26 |
+
"torch_dtype": "bfloat16",
|
27 |
+
"transformers_version": "4.36.2",
|
28 |
+
"use_cache": false,
|
29 |
+
"vocab_size": 32000,
|
30 |
+
"quantization_config": {
|
31 |
+
"quant_method": "exl2",
|
32 |
+
"version": "0.0.17",
|
33 |
+
"bits": 6.0,
|
34 |
+
"head_bits": 6,
|
35 |
+
"calibration": {
|
36 |
+
"rows": 100,
|
37 |
+
"length": 2048,
|
38 |
+
"dataset": "(default)"
|
39 |
+
}
|
40 |
+
}
|
41 |
+
}
|
huggingface-metadata.txt
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
url: https://huggingface.co/h2m/mhm-8x7B-FrankenMoE-v1.0
|
2 |
+
branch: main
|
3 |
+
download date: 2024-04-06 11:41:33
|
4 |
+
sha256sum:
|
5 |
+
7d9cef00da0fe3f9a0d6133e40e161c4bd87cd745134f2b4a581e5d648b9a905 model-00001-of-00010.safetensors
|
6 |
+
e8c5e517cda583a43abd5fe33012f9c70c0a356560b8836379b57dbddc333ea1 model-00002-of-00010.safetensors
|
7 |
+
f2318ea1e1fe6fe95c20e3fa74f557a5986778ad936d2701334b3c2454d4edf6 model-00003-of-00010.safetensors
|
8 |
+
8f04e6f6dd251a16f6caf1ea87c04778526af45c5c70f41c7d26a4d8989b6e84 model-00004-of-00010.safetensors
|
9 |
+
ddeb07a6ecd130ee527aaa3a52e85a4e278dd513b1f9f5e4949497ceaf763405 model-00005-of-00010.safetensors
|
10 |
+
afab070b4e1b7f080c7aa183b690490f205bab37c723c7d1d573b4b2d4601b2a model-00006-of-00010.safetensors
|
11 |
+
c1eadd619781381f3983d468e11088476a84b42051f58fae99e4066881b9882c model-00007-of-00010.safetensors
|
12 |
+
b0c23f3449e8c03c039f0cfdf7f9fd3e66b737a988a9ec4f6694931351b99bb4 model-00008-of-00010.safetensors
|
13 |
+
7f3f0246252c01bbda4ffc27cc9b450e92818ec8033f0d4618a2094100c8b063 model-00009-of-00010.safetensors
|
14 |
+
f935e1ef6ae576a4707c7deeb4bd8b2e3eca2eb6141b7ede32fc5a59b3ba0117 model-00010-of-00010.safetensors
|
15 |
+
dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 tokenizer.model
|
model.safetensors.index.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"metadata": {"mergekit_version": "0.0.3.2"}, "weight_map": {"model.embed_tokens.weight": "model-00001-of-00010.safetensors", "model.norm.weight": "model-00001-of-00010.safetensors", "lm_head.weight": "model-00001-of-00010.safetensors", "model.layers.0.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.1.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.2.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.3.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.4.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.5.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.6.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.7.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.8.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.9.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.10.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.11.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.12.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.13.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.14.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.15.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.16.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.17.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.18.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.19.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.20.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.21.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.22.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.23.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.24.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.25.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.26.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.27.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.28.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.29.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.30.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.31.input_layernorm.weight": "model-00001-of-00010.safetensors", "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00010.safetensors", "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00010.safetensors", "model.layers.0.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00010.safetensors", "model.layers.0.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00010.safetensors", "model.layers.0.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00010.safetensors", "model.layers.0.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00010.safetensors", "model.layers.0.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00010.safetensors", "model.layers.0.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00010.safetensors", "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00010.safetensors", "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00010.safetensors", "model.layers.1.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00010.safetensors", "model.layers.1.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00010.safetensors", "model.layers.1.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00010.safetensors", "model.layers.1.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00010.safetensors", "model.layers.1.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00010.safetensors", "model.layers.1.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00010.safetensors", "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00010.safetensors", "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00010.safetensors", "model.layers.2.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00010.safetensors", "model.layers.2.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00010.safetensors", "model.layers.2.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00010.safetensors", "model.layers.2.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00010.safetensors", "model.layers.2.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00010.safetensors", "model.layers.2.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00010.safetensors", "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00010.safetensors", "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00010.safetensors", "model.layers.3.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00010.safetensors", "model.layers.3.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00010.safetensors", "model.layers.3.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00010.safetensors", "model.layers.3.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00010.safetensors", "model.layers.3.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00010.safetensors", "model.layers.3.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00010.safetensors", "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00010.safetensors", "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00010.safetensors", "model.layers.4.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00010.safetensors", "model.layers.4.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00010.safetensors", "model.layers.4.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00010.safetensors", "model.layers.4.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00010.safetensors", "model.layers.4.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00010.safetensors", "model.layers.4.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00010.safetensors", "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00010.safetensors", "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00010.safetensors", "model.layers.5.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00010.safetensors", "model.layers.5.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00010.safetensors", "model.layers.5.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00010.safetensors", "model.layers.5.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00010.safetensors", "model.layers.5.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00010.safetensors", "model.layers.5.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00010.safetensors", "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00010.safetensors", "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00010.safetensors", "model.layers.6.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00010.safetensors", "model.layers.6.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00010.safetensors", "model.layers.6.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00010.safetensors", "model.layers.6.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00010.safetensors", "model.layers.6.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00010.safetensors", "model.layers.6.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00010.safetensors", "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00010.safetensors", "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00010.safetensors", "model.layers.7.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00010.safetensors", "model.layers.7.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00010.safetensors", "model.layers.7.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00010.safetensors", "model.layers.7.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00010.safetensors", "model.layers.7.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00010.safetensors", "model.layers.7.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00010.safetensors", "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00010.safetensors", "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00010.safetensors", "model.layers.8.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00010.safetensors", "model.layers.8.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00010.safetensors", "model.layers.8.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00010.safetensors", "model.layers.8.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00010.safetensors", "model.layers.8.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00010.safetensors", "model.layers.8.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00010.safetensors", "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00010.safetensors", "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00010.safetensors", "model.layers.9.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00010.safetensors", "model.layers.9.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00010.safetensors", "model.layers.9.block_sparse_moe.experts.4.w3.weight": "model-00001-of-00010.safetensors", "model.layers.9.block_sparse_moe.experts.5.w3.weight": "model-00001-of-00010.safetensors", "model.layers.9.block_sparse_moe.experts.6.w3.weight": "model-00001-of-00010.safetensors", "model.layers.9.block_sparse_moe.experts.7.w3.weight": "model-00001-of-00010.safetensors", "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00010.safetensors", "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00010.safetensors", "model.layers.10.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00010.safetensors", "model.layers.10.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00010.safetensors", "model.layers.10.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00010.safetensors", "model.layers.10.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00010.safetensors", "model.layers.10.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00010.safetensors", "model.layers.10.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00010.safetensors", "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00010.safetensors", "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00010.safetensors", "model.layers.11.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00010.safetensors", "model.layers.11.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00010.safetensors", "model.layers.11.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00010.safetensors", "model.layers.11.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00010.safetensors", "model.layers.11.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00010.safetensors", "model.layers.11.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00010.safetensors", "model.layers.12.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00010.safetensors", "model.layers.12.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00010.safetensors", "model.layers.12.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00010.safetensors", "model.layers.12.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00010.safetensors", "model.layers.12.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00010.safetensors", "model.layers.12.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00010.safetensors", "model.layers.12.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00010.safetensors", "model.layers.12.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00010.safetensors", "model.layers.13.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00010.safetensors", "model.layers.13.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00010.safetensors", "model.layers.13.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00010.safetensors", "model.layers.13.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00010.safetensors", "model.layers.13.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00010.safetensors", "model.layers.13.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00010.safetensors", "model.layers.13.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00010.safetensors", "model.layers.13.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00010.safetensors", "model.layers.14.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00010.safetensors", "model.layers.14.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00010.safetensors", "model.layers.14.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00010.safetensors", "model.layers.14.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00010.safetensors", "model.layers.14.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00010.safetensors", "model.layers.14.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00010.safetensors", "model.layers.14.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00010.safetensors", "model.layers.14.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00010.safetensors", "model.layers.15.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00010.safetensors", "model.layers.15.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00010.safetensors", "model.layers.15.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00010.safetensors", "model.layers.15.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00010.safetensors", "model.layers.15.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00010.safetensors", "model.layers.15.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00010.safetensors", "model.layers.15.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00010.safetensors", "model.layers.15.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00010.safetensors", "model.layers.16.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00010.safetensors", "model.layers.16.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00010.safetensors", "model.layers.16.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00010.safetensors", "model.layers.16.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00010.safetensors", "model.layers.16.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00010.safetensors", "model.layers.16.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00010.safetensors", "model.layers.16.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00010.safetensors", "model.layers.16.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00010.safetensors", "model.layers.17.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00010.safetensors", "model.layers.17.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00010.safetensors", "model.layers.17.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00010.safetensors", "model.layers.17.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00010.safetensors", "model.layers.17.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00010.safetensors", "model.layers.17.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00010.safetensors", "model.layers.17.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00010.safetensors", "model.layers.17.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00010.safetensors", "model.layers.18.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00010.safetensors", "model.layers.18.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00010.safetensors", "model.layers.18.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00010.safetensors", "model.layers.18.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00010.safetensors", "model.layers.18.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00010.safetensors", "model.layers.18.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00010.safetensors", "model.layers.18.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00010.safetensors", "model.layers.18.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00010.safetensors", "model.layers.19.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00010.safetensors", "model.layers.19.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00010.safetensors", "model.layers.19.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00010.safetensors", "model.layers.19.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00010.safetensors", "model.layers.19.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00010.safetensors", "model.layers.19.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00010.safetensors", "model.layers.19.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00010.safetensors", "model.layers.19.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00010.safetensors", "model.layers.20.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00010.safetensors", "model.layers.20.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00010.safetensors", "model.layers.20.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00010.safetensors", "model.layers.20.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00010.safetensors", "model.layers.20.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00010.safetensors", "model.layers.20.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00010.safetensors", "model.layers.20.block_sparse_moe.experts.6.w3.weight": "model-00003-of-00010.safetensors", "model.layers.20.block_sparse_moe.experts.7.w3.weight": "model-00003-of-00010.safetensors", "model.layers.21.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00010.safetensors", "model.layers.21.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00010.safetensors", "model.layers.21.block_sparse_moe.experts.2.w3.weight": "model-00003-of-00010.safetensors", "model.layers.21.block_sparse_moe.experts.3.w3.weight": "model-00003-of-00010.safetensors", "model.layers.21.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00010.safetensors", "model.layers.21.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00010.safetensors", "model.layers.21.block_sparse_moe.experts.6.w3.weight": "model-00003-of-00010.safetensors", "model.layers.21.block_sparse_moe.experts.7.w3.weight": "model-00003-of-00010.safetensors", "model.layers.22.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00010.safetensors", "model.layers.22.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00010.safetensors", "model.layers.22.block_sparse_moe.experts.2.w3.weight": "model-00003-of-00010.safetensors", "model.layers.22.block_sparse_moe.experts.3.w3.weight": "model-00003-of-00010.safetensors", "model.layers.22.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00010.safetensors", "model.layers.22.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00010.safetensors", "model.layers.22.block_sparse_moe.experts.6.w3.weight": "model-00003-of-00010.safetensors", "model.layers.22.block_sparse_moe.experts.7.w3.weight": "model-00003-of-00010.safetensors", "model.layers.23.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00010.safetensors", "model.layers.23.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00010.safetensors", "model.layers.23.block_sparse_moe.experts.2.w3.weight": "model-00003-of-00010.safetensors", "model.layers.23.block_sparse_moe.experts.3.w3.weight": "model-00003-of-00010.safetensors", "model.layers.23.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00010.safetensors", "model.layers.23.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00010.safetensors", "model.layers.23.block_sparse_moe.experts.6.w3.weight": "model-00003-of-00010.safetensors", "model.layers.23.block_sparse_moe.experts.7.w3.weight": "model-00003-of-00010.safetensors", "model.layers.24.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00010.safetensors", "model.layers.24.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00010.safetensors", "model.layers.24.block_sparse_moe.experts.2.w3.weight": "model-00003-of-00010.safetensors", "model.layers.24.block_sparse_moe.experts.3.w3.weight": "model-00003-of-00010.safetensors", "model.layers.24.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00010.safetensors", "model.layers.24.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00010.safetensors", "model.layers.24.block_sparse_moe.experts.6.w3.weight": "model-00003-of-00010.safetensors", "model.layers.24.block_sparse_moe.experts.7.w3.weight": "model-00003-of-00010.safetensors", "model.layers.25.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00010.safetensors", "model.layers.25.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00010.safetensors", "model.layers.25.block_sparse_moe.experts.2.w3.weight": "model-00003-of-00010.safetensors", "model.layers.25.block_sparse_moe.experts.3.w3.weight": "model-00003-of-00010.safetensors", "model.layers.25.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00010.safetensors", "model.layers.25.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00010.safetensors", "model.layers.25.block_sparse_moe.experts.6.w3.weight": "model-00003-of-00010.safetensors", "model.layers.25.block_sparse_moe.experts.7.w3.weight": "model-00003-of-00010.safetensors", "model.layers.26.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00010.safetensors", "model.layers.26.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00010.safetensors", "model.layers.26.block_sparse_moe.experts.2.w3.weight": "model-00003-of-00010.safetensors", "model.layers.26.block_sparse_moe.experts.3.w3.weight": "model-00003-of-00010.safetensors", "model.layers.26.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00010.safetensors", "model.layers.26.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00010.safetensors", "model.layers.26.block_sparse_moe.experts.6.w3.weight": "model-00003-of-00010.safetensors", "model.layers.26.block_sparse_moe.experts.7.w3.weight": "model-00003-of-00010.safetensors", "model.layers.27.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00010.safetensors", "model.layers.27.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00010.safetensors", "model.layers.27.block_sparse_moe.experts.2.w3.weight": "model-00003-of-00010.safetensors", "model.layers.27.block_sparse_moe.experts.3.w3.weight": "model-00003-of-00010.safetensors", "model.layers.27.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00010.safetensors", "model.layers.27.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00010.safetensors", "model.layers.27.block_sparse_moe.experts.6.w3.weight": "model-00003-of-00010.safetensors", "model.layers.27.block_sparse_moe.experts.7.w3.weight": "model-00003-of-00010.safetensors", "model.layers.28.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00010.safetensors", "model.layers.28.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00010.safetensors", "model.layers.28.block_sparse_moe.experts.2.w3.weight": "model-00003-of-00010.safetensors", "model.layers.28.block_sparse_moe.experts.3.w3.weight": "model-00003-of-00010.safetensors", "model.layers.28.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00010.safetensors", "model.layers.28.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00010.safetensors", "model.layers.28.block_sparse_moe.experts.6.w3.weight": "model-00003-of-00010.safetensors", "model.layers.28.block_sparse_moe.experts.7.w3.weight": "model-00003-of-00010.safetensors", "model.layers.29.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00010.safetensors", "model.layers.29.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00010.safetensors", "model.layers.29.block_sparse_moe.experts.2.w3.weight": "model-00003-of-00010.safetensors", "model.layers.29.block_sparse_moe.experts.3.w3.weight": "model-00003-of-00010.safetensors", "model.layers.29.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00010.safetensors", "model.layers.29.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00010.safetensors", "model.layers.29.block_sparse_moe.experts.6.w3.weight": "model-00003-of-00010.safetensors", "model.layers.29.block_sparse_moe.experts.7.w3.weight": "model-00003-of-00010.safetensors", "model.layers.30.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00010.safetensors", "model.layers.30.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00010.safetensors", "model.layers.30.block_sparse_moe.experts.2.w3.weight": "model-00003-of-00010.safetensors", "model.layers.30.block_sparse_moe.experts.3.w3.weight": "model-00003-of-00010.safetensors", "model.layers.30.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00010.safetensors", "model.layers.30.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00010.safetensors", "model.layers.30.block_sparse_moe.experts.6.w3.weight": "model-00003-of-00010.safetensors", "model.layers.30.block_sparse_moe.experts.7.w3.weight": "model-00003-of-00010.safetensors", "model.layers.31.block_sparse_moe.experts.0.w3.weight": "model-00003-of-00010.safetensors", "model.layers.31.block_sparse_moe.experts.1.w3.weight": "model-00003-of-00010.safetensors", "model.layers.31.block_sparse_moe.experts.2.w3.weight": "model-00004-of-00010.safetensors", "model.layers.31.block_sparse_moe.experts.3.w3.weight": "model-00004-of-00010.safetensors", "model.layers.31.block_sparse_moe.experts.4.w3.weight": "model-00004-of-00010.safetensors", "model.layers.31.block_sparse_moe.experts.5.w3.weight": "model-00004-of-00010.safetensors", "model.layers.31.block_sparse_moe.experts.6.w3.weight": "model-00004-of-00010.safetensors", "model.layers.31.block_sparse_moe.experts.7.w3.weight": "model-00004-of-00010.safetensors", "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00010.safetensors", "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00010.safetensors", "model.layers.0.block_sparse_moe.experts.2.w2.weight": "model-00004-of-00010.safetensors", "model.layers.0.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00010.safetensors", "model.layers.0.block_sparse_moe.experts.4.w2.weight": "model-00004-of-00010.safetensors", "model.layers.0.block_sparse_moe.experts.5.w2.weight": "model-00004-of-00010.safetensors", "model.layers.0.block_sparse_moe.experts.6.w2.weight": "model-00004-of-00010.safetensors", "model.layers.0.block_sparse_moe.experts.7.w2.weight": "model-00004-of-00010.safetensors", "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00010.safetensors", "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00010.safetensors", "model.layers.1.block_sparse_moe.experts.2.w2.weight": "model-00004-of-00010.safetensors", "model.layers.1.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00010.safetensors", "model.layers.1.block_sparse_moe.experts.4.w2.weight": "model-00004-of-00010.safetensors", "model.layers.1.block_sparse_moe.experts.5.w2.weight": "model-00004-of-00010.safetensors", "model.layers.1.block_sparse_moe.experts.6.w2.weight": "model-00004-of-00010.safetensors", "model.layers.1.block_sparse_moe.experts.7.w2.weight": "model-00004-of-00010.safetensors", "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00010.safetensors", "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00010.safetensors", "model.layers.2.block_sparse_moe.experts.2.w2.weight": "model-00004-of-00010.safetensors", "model.layers.2.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00010.safetensors", "model.layers.2.block_sparse_moe.experts.4.w2.weight": "model-00004-of-00010.safetensors", "model.layers.2.block_sparse_moe.experts.5.w2.weight": "model-00004-of-00010.safetensors", "model.layers.2.block_sparse_moe.experts.6.w2.weight": "model-00004-of-00010.safetensors", "model.layers.2.block_sparse_moe.experts.7.w2.weight": "model-00004-of-00010.safetensors", "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00010.safetensors", "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00010.safetensors", "model.layers.3.block_sparse_moe.experts.2.w2.weight": "model-00004-of-00010.safetensors", "model.layers.3.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00010.safetensors", "model.layers.3.block_sparse_moe.experts.4.w2.weight": "model-00004-of-00010.safetensors", "model.layers.3.block_sparse_moe.experts.5.w2.weight": "model-00004-of-00010.safetensors", "model.layers.3.block_sparse_moe.experts.6.w2.weight": "model-00004-of-00010.safetensors", "model.layers.3.block_sparse_moe.experts.7.w2.weight": "model-00004-of-00010.safetensors", "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00010.safetensors", "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00010.safetensors", "model.layers.4.block_sparse_moe.experts.2.w2.weight": "model-00004-of-00010.safetensors", "model.layers.4.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00010.safetensors", "model.layers.4.block_sparse_moe.experts.4.w2.weight": "model-00004-of-00010.safetensors", "model.layers.4.block_sparse_moe.experts.5.w2.weight": "model-00004-of-00010.safetensors", "model.layers.4.block_sparse_moe.experts.6.w2.weight": "model-00004-of-00010.safetensors", "model.layers.4.block_sparse_moe.experts.7.w2.weight": "model-00004-of-00010.safetensors", "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00010.safetensors", "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00010.safetensors", "model.layers.5.block_sparse_moe.experts.2.w2.weight": "model-00004-of-00010.safetensors", "model.layers.5.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00010.safetensors", "model.layers.5.block_sparse_moe.experts.4.w2.weight": "model-00004-of-00010.safetensors", "model.layers.5.block_sparse_moe.experts.5.w2.weight": "model-00004-of-00010.safetensors", "model.layers.5.block_sparse_moe.experts.6.w2.weight": "model-00004-of-00010.safetensors", "model.layers.5.block_sparse_moe.experts.7.w2.weight": "model-00004-of-00010.safetensors", "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00010.safetensors", "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00010.safetensors", "model.layers.6.block_sparse_moe.experts.2.w2.weight": "model-00004-of-00010.safetensors", "model.layers.6.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00010.safetensors", "model.layers.6.block_sparse_moe.experts.4.w2.weight": "model-00004-of-00010.safetensors", "model.layers.6.block_sparse_moe.experts.5.w2.weight": "model-00004-of-00010.safetensors", "model.layers.6.block_sparse_moe.experts.6.w2.weight": "model-00004-of-00010.safetensors", "model.layers.6.block_sparse_moe.experts.7.w2.weight": "model-00004-of-00010.safetensors", "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00010.safetensors", "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00010.safetensors", "model.layers.7.block_sparse_moe.experts.2.w2.weight": "model-00004-of-00010.safetensors", "model.layers.7.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00010.safetensors", "model.layers.7.block_sparse_moe.experts.4.w2.weight": "model-00004-of-00010.safetensors", "model.layers.7.block_sparse_moe.experts.5.w2.weight": "model-00004-of-00010.safetensors", "model.layers.7.block_sparse_moe.experts.6.w2.weight": "model-00004-of-00010.safetensors", "model.layers.7.block_sparse_moe.experts.7.w2.weight": "model-00004-of-00010.safetensors", "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00010.safetensors", "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00010.safetensors", "model.layers.8.block_sparse_moe.experts.2.w2.weight": "model-00004-of-00010.safetensors", "model.layers.8.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00010.safetensors", "model.layers.8.block_sparse_moe.experts.4.w2.weight": "model-00004-of-00010.safetensors", "model.layers.8.block_sparse_moe.experts.5.w2.weight": "model-00004-of-00010.safetensors", "model.layers.8.block_sparse_moe.experts.6.w2.weight": "model-00004-of-00010.safetensors", "model.layers.8.block_sparse_moe.experts.7.w2.weight": "model-00004-of-00010.safetensors", "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00010.safetensors", "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00010.safetensors", "model.layers.9.block_sparse_moe.experts.2.w2.weight": "model-00004-of-00010.safetensors", "model.layers.9.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00010.safetensors", "model.layers.9.block_sparse_moe.experts.4.w2.weight": "model-00004-of-00010.safetensors", "model.layers.9.block_sparse_moe.experts.5.w2.weight": "model-00004-of-00010.safetensors", "model.layers.9.block_sparse_moe.experts.6.w2.weight": "model-00004-of-00010.safetensors", "model.layers.9.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00010.safetensors", "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00010.safetensors", "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00010.safetensors", "model.layers.10.block_sparse_moe.experts.2.w2.weight": "model-00005-of-00010.safetensors", "model.layers.10.block_sparse_moe.experts.3.w2.weight": "model-00005-of-00010.safetensors", "model.layers.10.block_sparse_moe.experts.4.w2.weight": "model-00005-of-00010.safetensors", "model.layers.10.block_sparse_moe.experts.5.w2.weight": "model-00005-of-00010.safetensors", "model.layers.10.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00010.safetensors", "model.layers.10.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00010.safetensors", "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00010.safetensors", "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00010.safetensors", "model.layers.11.block_sparse_moe.experts.2.w2.weight": "model-00005-of-00010.safetensors", "model.layers.11.block_sparse_moe.experts.3.w2.weight": "model-00005-of-00010.safetensors", "model.layers.11.block_sparse_moe.experts.4.w2.weight": "model-00005-of-00010.safetensors", "model.layers.11.block_sparse_moe.experts.5.w2.weight": "model-00005-of-00010.safetensors", "model.layers.11.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00010.safetensors", "model.layers.11.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00010.safetensors", "model.layers.12.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00010.safetensors", "model.layers.12.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00010.safetensors", "model.layers.12.block_sparse_moe.experts.2.w2.weight": "model-00005-of-00010.safetensors", "model.layers.12.block_sparse_moe.experts.3.w2.weight": "model-00005-of-00010.safetensors", "model.layers.12.block_sparse_moe.experts.4.w2.weight": "model-00005-of-00010.safetensors", "model.layers.12.block_sparse_moe.experts.5.w2.weight": "model-00005-of-00010.safetensors", "model.layers.12.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00010.safetensors", "model.layers.12.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00010.safetensors", "model.layers.13.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00010.safetensors", "model.layers.13.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00010.safetensors", "model.layers.13.block_sparse_moe.experts.2.w2.weight": "model-00005-of-00010.safetensors", "model.layers.13.block_sparse_moe.experts.3.w2.weight": "model-00005-of-00010.safetensors", "model.layers.13.block_sparse_moe.experts.4.w2.weight": "model-00005-of-00010.safetensors", "model.layers.13.block_sparse_moe.experts.5.w2.weight": "model-00005-of-00010.safetensors", "model.layers.13.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00010.safetensors", "model.layers.13.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00010.safetensors", "model.layers.14.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00010.safetensors", "model.layers.14.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00010.safetensors", "model.layers.14.block_sparse_moe.experts.2.w2.weight": "model-00005-of-00010.safetensors", "model.layers.14.block_sparse_moe.experts.3.w2.weight": "model-00005-of-00010.safetensors", "model.layers.14.block_sparse_moe.experts.4.w2.weight": "model-00005-of-00010.safetensors", "model.layers.14.block_sparse_moe.experts.5.w2.weight": "model-00005-of-00010.safetensors", "model.layers.14.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00010.safetensors", "model.layers.14.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00010.safetensors", "model.layers.15.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00010.safetensors", "model.layers.15.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00010.safetensors", "model.layers.15.block_sparse_moe.experts.2.w2.weight": "model-00005-of-00010.safetensors", "model.layers.15.block_sparse_moe.experts.3.w2.weight": "model-00005-of-00010.safetensors", "model.layers.15.block_sparse_moe.experts.4.w2.weight": "model-00005-of-00010.safetensors", "model.layers.15.block_sparse_moe.experts.5.w2.weight": "model-00005-of-00010.safetensors", "model.layers.15.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00010.safetensors", "model.layers.15.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00010.safetensors", "model.layers.16.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00010.safetensors", "model.layers.16.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00010.safetensors", "model.layers.16.block_sparse_moe.experts.2.w2.weight": "model-00005-of-00010.safetensors", "model.layers.16.block_sparse_moe.experts.3.w2.weight": "model-00005-of-00010.safetensors", "model.layers.16.block_sparse_moe.experts.4.w2.weight": "model-00005-of-00010.safetensors", "model.layers.16.block_sparse_moe.experts.5.w2.weight": "model-00005-of-00010.safetensors", "model.layers.16.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00010.safetensors", "model.layers.16.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00010.safetensors", "model.layers.17.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00010.safetensors", "model.layers.17.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00010.safetensors", "model.layers.17.block_sparse_moe.experts.2.w2.weight": "model-00005-of-00010.safetensors", "model.layers.17.block_sparse_moe.experts.3.w2.weight": "model-00005-of-00010.safetensors", "model.layers.17.block_sparse_moe.experts.4.w2.weight": "model-00005-of-00010.safetensors", "model.layers.17.block_sparse_moe.experts.5.w2.weight": "model-00005-of-00010.safetensors", "model.layers.17.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00010.safetensors", "model.layers.17.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00010.safetensors", "model.layers.18.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00010.safetensors", "model.layers.18.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00010.safetensors", "model.layers.18.block_sparse_moe.experts.2.w2.weight": "model-00005-of-00010.safetensors", "model.layers.18.block_sparse_moe.experts.3.w2.weight": "model-00005-of-00010.safetensors", "model.layers.18.block_sparse_moe.experts.4.w2.weight": "model-00005-of-00010.safetensors", "model.layers.18.block_sparse_moe.experts.5.w2.weight": "model-00005-of-00010.safetensors", "model.layers.18.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00010.safetensors", "model.layers.18.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00010.safetensors", "model.layers.19.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00010.safetensors", "model.layers.19.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00010.safetensors", "model.layers.19.block_sparse_moe.experts.2.w2.weight": "model-00005-of-00010.safetensors", "model.layers.19.block_sparse_moe.experts.3.w2.weight": "model-00005-of-00010.safetensors", "model.layers.19.block_sparse_moe.experts.4.w2.weight": "model-00005-of-00010.safetensors", "model.layers.19.block_sparse_moe.experts.5.w2.weight": "model-00005-of-00010.safetensors", "model.layers.19.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00010.safetensors", "model.layers.19.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00010.safetensors", "model.layers.20.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00010.safetensors", "model.layers.20.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00010.safetensors", "model.layers.20.block_sparse_moe.experts.2.w2.weight": "model-00005-of-00010.safetensors", "model.layers.20.block_sparse_moe.experts.3.w2.weight": "model-00005-of-00010.safetensors", "model.layers.20.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00010.safetensors", "model.layers.20.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00010.safetensors", "model.layers.20.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00010.safetensors", "model.layers.20.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00010.safetensors", "model.layers.21.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00010.safetensors", "model.layers.21.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00010.safetensors", "model.layers.21.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00010.safetensors", "model.layers.21.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00010.safetensors", "model.layers.21.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00010.safetensors", "model.layers.21.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00010.safetensors", "model.layers.21.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00010.safetensors", "model.layers.21.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00010.safetensors", "model.layers.22.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00010.safetensors", "model.layers.22.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00010.safetensors", "model.layers.22.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00010.safetensors", "model.layers.22.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00010.safetensors", "model.layers.22.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00010.safetensors", "model.layers.22.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00010.safetensors", "model.layers.22.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00010.safetensors", "model.layers.22.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00010.safetensors", "model.layers.23.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00010.safetensors", "model.layers.23.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00010.safetensors", "model.layers.23.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00010.safetensors", "model.layers.23.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00010.safetensors", "model.layers.23.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00010.safetensors", "model.layers.23.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00010.safetensors", "model.layers.23.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00010.safetensors", "model.layers.23.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00010.safetensors", "model.layers.24.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00010.safetensors", "model.layers.24.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00010.safetensors", "model.layers.24.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00010.safetensors", "model.layers.24.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00010.safetensors", "model.layers.24.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00010.safetensors", "model.layers.24.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00010.safetensors", "model.layers.24.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00010.safetensors", "model.layers.24.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00010.safetensors", "model.layers.25.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00010.safetensors", "model.layers.25.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00010.safetensors", "model.layers.25.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00010.safetensors", "model.layers.25.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00010.safetensors", "model.layers.25.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00010.safetensors", "model.layers.25.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00010.safetensors", "model.layers.25.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00010.safetensors", "model.layers.25.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00010.safetensors", "model.layers.26.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00010.safetensors", "model.layers.26.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00010.safetensors", "model.layers.26.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00010.safetensors", "model.layers.26.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00010.safetensors", "model.layers.26.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00010.safetensors", "model.layers.26.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00010.safetensors", "model.layers.26.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00010.safetensors", "model.layers.26.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00010.safetensors", "model.layers.27.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00010.safetensors", "model.layers.27.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00010.safetensors", "model.layers.27.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00010.safetensors", "model.layers.27.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00010.safetensors", "model.layers.27.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00010.safetensors", "model.layers.27.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00010.safetensors", "model.layers.27.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00010.safetensors", "model.layers.27.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00010.safetensors", "model.layers.28.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00010.safetensors", "model.layers.28.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00010.safetensors", "model.layers.28.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00010.safetensors", "model.layers.28.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00010.safetensors", "model.layers.28.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00010.safetensors", "model.layers.28.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00010.safetensors", "model.layers.28.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00010.safetensors", "model.layers.28.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00010.safetensors", "model.layers.29.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00010.safetensors", "model.layers.29.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00010.safetensors", "model.layers.29.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00010.safetensors", "model.layers.29.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00010.safetensors", "model.layers.29.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00010.safetensors", "model.layers.29.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00010.safetensors", "model.layers.29.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00010.safetensors", "model.layers.29.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00010.safetensors", "model.layers.30.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00010.safetensors", "model.layers.30.block_sparse_moe.experts.1.w2.weight": "model-00006-of-00010.safetensors", "model.layers.30.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00010.safetensors", "model.layers.30.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00010.safetensors", "model.layers.30.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00010.safetensors", "model.layers.30.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00010.safetensors", "model.layers.30.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00010.safetensors", "model.layers.30.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00010.safetensors", "model.layers.31.block_sparse_moe.experts.0.w2.weight": "model-00006-of-00010.safetensors", "model.layers.31.block_sparse_moe.experts.1.w2.weight": "model-00007-of-00010.safetensors", "model.layers.31.block_sparse_moe.experts.2.w2.weight": "model-00007-of-00010.safetensors", "model.layers.31.block_sparse_moe.experts.3.w2.weight": "model-00007-of-00010.safetensors", "model.layers.31.block_sparse_moe.experts.4.w2.weight": "model-00007-of-00010.safetensors", "model.layers.31.block_sparse_moe.experts.5.w2.weight": "model-00007-of-00010.safetensors", "model.layers.31.block_sparse_moe.experts.6.w2.weight": "model-00007-of-00010.safetensors", "model.layers.31.block_sparse_moe.experts.7.w2.weight": "model-00007-of-00010.safetensors", "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00010.safetensors", "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00010.safetensors", "model.layers.0.block_sparse_moe.experts.2.w1.weight": "model-00007-of-00010.safetensors", "model.layers.0.block_sparse_moe.experts.3.w1.weight": "model-00007-of-00010.safetensors", "model.layers.0.block_sparse_moe.experts.4.w1.weight": "model-00007-of-00010.safetensors", "model.layers.0.block_sparse_moe.experts.5.w1.weight": "model-00007-of-00010.safetensors", "model.layers.0.block_sparse_moe.experts.6.w1.weight": "model-00007-of-00010.safetensors", "model.layers.0.block_sparse_moe.experts.7.w1.weight": "model-00007-of-00010.safetensors", "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00010.safetensors", "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00010.safetensors", "model.layers.1.block_sparse_moe.experts.2.w1.weight": "model-00007-of-00010.safetensors", "model.layers.1.block_sparse_moe.experts.3.w1.weight": "model-00007-of-00010.safetensors", "model.layers.1.block_sparse_moe.experts.4.w1.weight": "model-00007-of-00010.safetensors", "model.layers.1.block_sparse_moe.experts.5.w1.weight": "model-00007-of-00010.safetensors", "model.layers.1.block_sparse_moe.experts.6.w1.weight": "model-00007-of-00010.safetensors", "model.layers.1.block_sparse_moe.experts.7.w1.weight": "model-00007-of-00010.safetensors", "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00010.safetensors", "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00010.safetensors", "model.layers.2.block_sparse_moe.experts.2.w1.weight": "model-00007-of-00010.safetensors", "model.layers.2.block_sparse_moe.experts.3.w1.weight": "model-00007-of-00010.safetensors", "model.layers.2.block_sparse_moe.experts.4.w1.weight": "model-00007-of-00010.safetensors", "model.layers.2.block_sparse_moe.experts.5.w1.weight": "model-00007-of-00010.safetensors", "model.layers.2.block_sparse_moe.experts.6.w1.weight": "model-00007-of-00010.safetensors", "model.layers.2.block_sparse_moe.experts.7.w1.weight": "model-00007-of-00010.safetensors", "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00010.safetensors", "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00010.safetensors", "model.layers.3.block_sparse_moe.experts.2.w1.weight": "model-00007-of-00010.safetensors", "model.layers.3.block_sparse_moe.experts.3.w1.weight": "model-00007-of-00010.safetensors", "model.layers.3.block_sparse_moe.experts.4.w1.weight": "model-00007-of-00010.safetensors", "model.layers.3.block_sparse_moe.experts.5.w1.weight": "model-00007-of-00010.safetensors", "model.layers.3.block_sparse_moe.experts.6.w1.weight": "model-00007-of-00010.safetensors", "model.layers.3.block_sparse_moe.experts.7.w1.weight": "model-00007-of-00010.safetensors", "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00010.safetensors", "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00010.safetensors", "model.layers.4.block_sparse_moe.experts.2.w1.weight": "model-00007-of-00010.safetensors", "model.layers.4.block_sparse_moe.experts.3.w1.weight": "model-00007-of-00010.safetensors", "model.layers.4.block_sparse_moe.experts.4.w1.weight": "model-00007-of-00010.safetensors", "model.layers.4.block_sparse_moe.experts.5.w1.weight": "model-00007-of-00010.safetensors", "model.layers.4.block_sparse_moe.experts.6.w1.weight": "model-00007-of-00010.safetensors", "model.layers.4.block_sparse_moe.experts.7.w1.weight": "model-00007-of-00010.safetensors", "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00010.safetensors", "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00010.safetensors", "model.layers.5.block_sparse_moe.experts.2.w1.weight": "model-00007-of-00010.safetensors", "model.layers.5.block_sparse_moe.experts.3.w1.weight": "model-00007-of-00010.safetensors", "model.layers.5.block_sparse_moe.experts.4.w1.weight": "model-00007-of-00010.safetensors", "model.layers.5.block_sparse_moe.experts.5.w1.weight": "model-00007-of-00010.safetensors", "model.layers.5.block_sparse_moe.experts.6.w1.weight": "model-00007-of-00010.safetensors", "model.layers.5.block_sparse_moe.experts.7.w1.weight": "model-00007-of-00010.safetensors", "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00010.safetensors", "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00010.safetensors", "model.layers.6.block_sparse_moe.experts.2.w1.weight": "model-00007-of-00010.safetensors", "model.layers.6.block_sparse_moe.experts.3.w1.weight": "model-00007-of-00010.safetensors", "model.layers.6.block_sparse_moe.experts.4.w1.weight": "model-00007-of-00010.safetensors", "model.layers.6.block_sparse_moe.experts.5.w1.weight": "model-00007-of-00010.safetensors", "model.layers.6.block_sparse_moe.experts.6.w1.weight": "model-00007-of-00010.safetensors", "model.layers.6.block_sparse_moe.experts.7.w1.weight": "model-00007-of-00010.safetensors", "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00010.safetensors", "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00010.safetensors", "model.layers.7.block_sparse_moe.experts.2.w1.weight": "model-00007-of-00010.safetensors", "model.layers.7.block_sparse_moe.experts.3.w1.weight": "model-00007-of-00010.safetensors", "model.layers.7.block_sparse_moe.experts.4.w1.weight": "model-00007-of-00010.safetensors", "model.layers.7.block_sparse_moe.experts.5.w1.weight": "model-00007-of-00010.safetensors", "model.layers.7.block_sparse_moe.experts.6.w1.weight": "model-00007-of-00010.safetensors", "model.layers.7.block_sparse_moe.experts.7.w1.weight": "model-00007-of-00010.safetensors", "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00010.safetensors", "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00010.safetensors", "model.layers.8.block_sparse_moe.experts.2.w1.weight": "model-00007-of-00010.safetensors", "model.layers.8.block_sparse_moe.experts.3.w1.weight": "model-00007-of-00010.safetensors", "model.layers.8.block_sparse_moe.experts.4.w1.weight": "model-00007-of-00010.safetensors", "model.layers.8.block_sparse_moe.experts.5.w1.weight": "model-00007-of-00010.safetensors", "model.layers.8.block_sparse_moe.experts.6.w1.weight": "model-00007-of-00010.safetensors", "model.layers.8.block_sparse_moe.experts.7.w1.weight": "model-00007-of-00010.safetensors", "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00010.safetensors", "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00010.safetensors", "model.layers.9.block_sparse_moe.experts.2.w1.weight": "model-00007-of-00010.safetensors", "model.layers.9.block_sparse_moe.experts.3.w1.weight": "model-00007-of-00010.safetensors", "model.layers.9.block_sparse_moe.experts.4.w1.weight": "model-00007-of-00010.safetensors", "model.layers.9.block_sparse_moe.experts.5.w1.weight": "model-00007-of-00010.safetensors", "model.layers.9.block_sparse_moe.experts.6.w1.weight": "model-00008-of-00010.safetensors", "model.layers.9.block_sparse_moe.experts.7.w1.weight": "model-00008-of-00010.safetensors", "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00010.safetensors", "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00010.safetensors", "model.layers.10.block_sparse_moe.experts.2.w1.weight": "model-00008-of-00010.safetensors", "model.layers.10.block_sparse_moe.experts.3.w1.weight": "model-00008-of-00010.safetensors", "model.layers.10.block_sparse_moe.experts.4.w1.weight": "model-00008-of-00010.safetensors", "model.layers.10.block_sparse_moe.experts.5.w1.weight": "model-00008-of-00010.safetensors", "model.layers.10.block_sparse_moe.experts.6.w1.weight": "model-00008-of-00010.safetensors", "model.layers.10.block_sparse_moe.experts.7.w1.weight": "model-00008-of-00010.safetensors", "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00010.safetensors", "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00010.safetensors", "model.layers.11.block_sparse_moe.experts.2.w1.weight": "model-00008-of-00010.safetensors", "model.layers.11.block_sparse_moe.experts.3.w1.weight": "model-00008-of-00010.safetensors", "model.layers.11.block_sparse_moe.experts.4.w1.weight": "model-00008-of-00010.safetensors", "model.layers.11.block_sparse_moe.experts.5.w1.weight": "model-00008-of-00010.safetensors", "model.layers.11.block_sparse_moe.experts.6.w1.weight": "model-00008-of-00010.safetensors", "model.layers.11.block_sparse_moe.experts.7.w1.weight": "model-00008-of-00010.safetensors", "model.layers.12.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00010.safetensors", "model.layers.12.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00010.safetensors", "model.layers.12.block_sparse_moe.experts.2.w1.weight": "model-00008-of-00010.safetensors", "model.layers.12.block_sparse_moe.experts.3.w1.weight": "model-00008-of-00010.safetensors", "model.layers.12.block_sparse_moe.experts.4.w1.weight": "model-00008-of-00010.safetensors", "model.layers.12.block_sparse_moe.experts.5.w1.weight": "model-00008-of-00010.safetensors", "model.layers.12.block_sparse_moe.experts.6.w1.weight": "model-00008-of-00010.safetensors", "model.layers.12.block_sparse_moe.experts.7.w1.weight": "model-00008-of-00010.safetensors", "model.layers.13.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00010.safetensors", "model.layers.13.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00010.safetensors", "model.layers.13.block_sparse_moe.experts.2.w1.weight": "model-00008-of-00010.safetensors", "model.layers.13.block_sparse_moe.experts.3.w1.weight": "model-00008-of-00010.safetensors", "model.layers.13.block_sparse_moe.experts.4.w1.weight": "model-00008-of-00010.safetensors", "model.layers.13.block_sparse_moe.experts.5.w1.weight": "model-00008-of-00010.safetensors", "model.layers.13.block_sparse_moe.experts.6.w1.weight": "model-00008-of-00010.safetensors", "model.layers.13.block_sparse_moe.experts.7.w1.weight": "model-00008-of-00010.safetensors", "model.layers.14.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00010.safetensors", "model.layers.14.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00010.safetensors", "model.layers.14.block_sparse_moe.experts.2.w1.weight": "model-00008-of-00010.safetensors", "model.layers.14.block_sparse_moe.experts.3.w1.weight": "model-00008-of-00010.safetensors", "model.layers.14.block_sparse_moe.experts.4.w1.weight": "model-00008-of-00010.safetensors", "model.layers.14.block_sparse_moe.experts.5.w1.weight": "model-00008-of-00010.safetensors", "model.layers.14.block_sparse_moe.experts.6.w1.weight": "model-00008-of-00010.safetensors", "model.layers.14.block_sparse_moe.experts.7.w1.weight": "model-00008-of-00010.safetensors", "model.layers.15.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00010.safetensors", "model.layers.15.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00010.safetensors", "model.layers.15.block_sparse_moe.experts.2.w1.weight": "model-00008-of-00010.safetensors", "model.layers.15.block_sparse_moe.experts.3.w1.weight": "model-00008-of-00010.safetensors", "model.layers.15.block_sparse_moe.experts.4.w1.weight": "model-00008-of-00010.safetensors", "model.layers.15.block_sparse_moe.experts.5.w1.weight": "model-00008-of-00010.safetensors", "model.layers.15.block_sparse_moe.experts.6.w1.weight": "model-00008-of-00010.safetensors", "model.layers.15.block_sparse_moe.experts.7.w1.weight": "model-00008-of-00010.safetensors", "model.layers.16.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00010.safetensors", "model.layers.16.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00010.safetensors", "model.layers.16.block_sparse_moe.experts.2.w1.weight": "model-00008-of-00010.safetensors", "model.layers.16.block_sparse_moe.experts.3.w1.weight": "model-00008-of-00010.safetensors", "model.layers.16.block_sparse_moe.experts.4.w1.weight": "model-00008-of-00010.safetensors", "model.layers.16.block_sparse_moe.experts.5.w1.weight": "model-00008-of-00010.safetensors", "model.layers.16.block_sparse_moe.experts.6.w1.weight": "model-00008-of-00010.safetensors", "model.layers.16.block_sparse_moe.experts.7.w1.weight": "model-00008-of-00010.safetensors", "model.layers.17.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00010.safetensors", "model.layers.17.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00010.safetensors", "model.layers.17.block_sparse_moe.experts.2.w1.weight": "model-00008-of-00010.safetensors", "model.layers.17.block_sparse_moe.experts.3.w1.weight": "model-00008-of-00010.safetensors", "model.layers.17.block_sparse_moe.experts.4.w1.weight": "model-00008-of-00010.safetensors", "model.layers.17.block_sparse_moe.experts.5.w1.weight": "model-00008-of-00010.safetensors", "model.layers.17.block_sparse_moe.experts.6.w1.weight": "model-00008-of-00010.safetensors", "model.layers.17.block_sparse_moe.experts.7.w1.weight": "model-00008-of-00010.safetensors", "model.layers.18.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00010.safetensors", "model.layers.18.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00010.safetensors", "model.layers.18.block_sparse_moe.experts.2.w1.weight": "model-00008-of-00010.safetensors", "model.layers.18.block_sparse_moe.experts.3.w1.weight": "model-00008-of-00010.safetensors", "model.layers.18.block_sparse_moe.experts.4.w1.weight": "model-00008-of-00010.safetensors", "model.layers.18.block_sparse_moe.experts.5.w1.weight": "model-00008-of-00010.safetensors", "model.layers.18.block_sparse_moe.experts.6.w1.weight": "model-00008-of-00010.safetensors", "model.layers.18.block_sparse_moe.experts.7.w1.weight": "model-00008-of-00010.safetensors", "model.layers.19.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00010.safetensors", "model.layers.19.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00010.safetensors", "model.layers.19.block_sparse_moe.experts.2.w1.weight": "model-00008-of-00010.safetensors", "model.layers.19.block_sparse_moe.experts.3.w1.weight": "model-00008-of-00010.safetensors", "model.layers.19.block_sparse_moe.experts.4.w1.weight": "model-00008-of-00010.safetensors", "model.layers.19.block_sparse_moe.experts.5.w1.weight": "model-00008-of-00010.safetensors", "model.layers.19.block_sparse_moe.experts.6.w1.weight": "model-00008-of-00010.safetensors", "model.layers.19.block_sparse_moe.experts.7.w1.weight": "model-00008-of-00010.safetensors", "model.layers.20.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00010.safetensors", "model.layers.20.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00010.safetensors", "model.layers.20.block_sparse_moe.experts.2.w1.weight": "model-00008-of-00010.safetensors", "model.layers.20.block_sparse_moe.experts.3.w1.weight": "model-00009-of-00010.safetensors", "model.layers.20.block_sparse_moe.experts.4.w1.weight": "model-00009-of-00010.safetensors", "model.layers.20.block_sparse_moe.experts.5.w1.weight": "model-00009-of-00010.safetensors", "model.layers.20.block_sparse_moe.experts.6.w1.weight": "model-00009-of-00010.safetensors", "model.layers.20.block_sparse_moe.experts.7.w1.weight": "model-00009-of-00010.safetensors", "model.layers.21.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00010.safetensors", "model.layers.21.block_sparse_moe.experts.1.w1.weight": "model-00009-of-00010.safetensors", "model.layers.21.block_sparse_moe.experts.2.w1.weight": "model-00009-of-00010.safetensors", "model.layers.21.block_sparse_moe.experts.3.w1.weight": "model-00009-of-00010.safetensors", "model.layers.21.block_sparse_moe.experts.4.w1.weight": "model-00009-of-00010.safetensors", "model.layers.21.block_sparse_moe.experts.5.w1.weight": "model-00009-of-00010.safetensors", "model.layers.21.block_sparse_moe.experts.6.w1.weight": "model-00009-of-00010.safetensors", "model.layers.21.block_sparse_moe.experts.7.w1.weight": "model-00009-of-00010.safetensors", "model.layers.22.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00010.safetensors", "model.layers.22.block_sparse_moe.experts.1.w1.weight": "model-00009-of-00010.safetensors", "model.layers.22.block_sparse_moe.experts.2.w1.weight": "model-00009-of-00010.safetensors", "model.layers.22.block_sparse_moe.experts.3.w1.weight": "model-00009-of-00010.safetensors", "model.layers.22.block_sparse_moe.experts.4.w1.weight": "model-00009-of-00010.safetensors", "model.layers.22.block_sparse_moe.experts.5.w1.weight": "model-00009-of-00010.safetensors", "model.layers.22.block_sparse_moe.experts.6.w1.weight": "model-00009-of-00010.safetensors", "model.layers.22.block_sparse_moe.experts.7.w1.weight": "model-00009-of-00010.safetensors", "model.layers.23.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00010.safetensors", "model.layers.23.block_sparse_moe.experts.1.w1.weight": "model-00009-of-00010.safetensors", "model.layers.23.block_sparse_moe.experts.2.w1.weight": "model-00009-of-00010.safetensors", "model.layers.23.block_sparse_moe.experts.3.w1.weight": "model-00009-of-00010.safetensors", "model.layers.23.block_sparse_moe.experts.4.w1.weight": "model-00009-of-00010.safetensors", "model.layers.23.block_sparse_moe.experts.5.w1.weight": "model-00009-of-00010.safetensors", "model.layers.23.block_sparse_moe.experts.6.w1.weight": "model-00009-of-00010.safetensors", "model.layers.23.block_sparse_moe.experts.7.w1.weight": "model-00009-of-00010.safetensors", "model.layers.24.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00010.safetensors", "model.layers.24.block_sparse_moe.experts.1.w1.weight": "model-00009-of-00010.safetensors", "model.layers.24.block_sparse_moe.experts.2.w1.weight": "model-00009-of-00010.safetensors", "model.layers.24.block_sparse_moe.experts.3.w1.weight": "model-00009-of-00010.safetensors", "model.layers.24.block_sparse_moe.experts.4.w1.weight": "model-00009-of-00010.safetensors", "model.layers.24.block_sparse_moe.experts.5.w1.weight": "model-00009-of-00010.safetensors", "model.layers.24.block_sparse_moe.experts.6.w1.weight": "model-00009-of-00010.safetensors", "model.layers.24.block_sparse_moe.experts.7.w1.weight": "model-00009-of-00010.safetensors", "model.layers.25.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00010.safetensors", "model.layers.25.block_sparse_moe.experts.1.w1.weight": "model-00009-of-00010.safetensors", "model.layers.25.block_sparse_moe.experts.2.w1.weight": "model-00009-of-00010.safetensors", "model.layers.25.block_sparse_moe.experts.3.w1.weight": "model-00009-of-00010.safetensors", "model.layers.25.block_sparse_moe.experts.4.w1.weight": "model-00009-of-00010.safetensors", "model.layers.25.block_sparse_moe.experts.5.w1.weight": "model-00009-of-00010.safetensors", "model.layers.25.block_sparse_moe.experts.6.w1.weight": "model-00009-of-00010.safetensors", "model.layers.25.block_sparse_moe.experts.7.w1.weight": "model-00009-of-00010.safetensors", "model.layers.26.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00010.safetensors", "model.layers.26.block_sparse_moe.experts.1.w1.weight": "model-00009-of-00010.safetensors", "model.layers.26.block_sparse_moe.experts.2.w1.weight": "model-00009-of-00010.safetensors", "model.layers.26.block_sparse_moe.experts.3.w1.weight": "model-00009-of-00010.safetensors", "model.layers.26.block_sparse_moe.experts.4.w1.weight": "model-00009-of-00010.safetensors", "model.layers.26.block_sparse_moe.experts.5.w1.weight": "model-00009-of-00010.safetensors", "model.layers.26.block_sparse_moe.experts.6.w1.weight": "model-00009-of-00010.safetensors", "model.layers.26.block_sparse_moe.experts.7.w1.weight": "model-00009-of-00010.safetensors", "model.layers.27.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00010.safetensors", "model.layers.27.block_sparse_moe.experts.1.w1.weight": "model-00009-of-00010.safetensors", "model.layers.27.block_sparse_moe.experts.2.w1.weight": "model-00009-of-00010.safetensors", "model.layers.27.block_sparse_moe.experts.3.w1.weight": "model-00009-of-00010.safetensors", "model.layers.27.block_sparse_moe.experts.4.w1.weight": "model-00009-of-00010.safetensors", "model.layers.27.block_sparse_moe.experts.5.w1.weight": "model-00009-of-00010.safetensors", "model.layers.27.block_sparse_moe.experts.6.w1.weight": "model-00009-of-00010.safetensors", "model.layers.27.block_sparse_moe.experts.7.w1.weight": "model-00009-of-00010.safetensors", "model.layers.28.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00010.safetensors", "model.layers.28.block_sparse_moe.experts.1.w1.weight": "model-00009-of-00010.safetensors", "model.layers.28.block_sparse_moe.experts.2.w1.weight": "model-00009-of-00010.safetensors", "model.layers.28.block_sparse_moe.experts.3.w1.weight": "model-00009-of-00010.safetensors", "model.layers.28.block_sparse_moe.experts.4.w1.weight": "model-00009-of-00010.safetensors", "model.layers.28.block_sparse_moe.experts.5.w1.weight": "model-00009-of-00010.safetensors", "model.layers.28.block_sparse_moe.experts.6.w1.weight": "model-00009-of-00010.safetensors", "model.layers.28.block_sparse_moe.experts.7.w1.weight": "model-00009-of-00010.safetensors", "model.layers.29.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00010.safetensors", "model.layers.29.block_sparse_moe.experts.1.w1.weight": "model-00009-of-00010.safetensors", "model.layers.29.block_sparse_moe.experts.2.w1.weight": "model-00009-of-00010.safetensors", "model.layers.29.block_sparse_moe.experts.3.w1.weight": "model-00009-of-00010.safetensors", "model.layers.29.block_sparse_moe.experts.4.w1.weight": "model-00009-of-00010.safetensors", "model.layers.29.block_sparse_moe.experts.5.w1.weight": "model-00009-of-00010.safetensors", "model.layers.29.block_sparse_moe.experts.6.w1.weight": "model-00009-of-00010.safetensors", "model.layers.29.block_sparse_moe.experts.7.w1.weight": "model-00009-of-00010.safetensors", "model.layers.30.block_sparse_moe.experts.0.w1.weight": "model-00009-of-00010.safetensors", "model.layers.30.block_sparse_moe.experts.1.w1.weight": "model-00009-of-00010.safetensors", "model.layers.30.block_sparse_moe.experts.2.w1.weight": "model-00009-of-00010.safetensors", "model.layers.30.block_sparse_moe.experts.3.w1.weight": "model-00009-of-00010.safetensors", "model.layers.30.block_sparse_moe.experts.4.w1.weight": "model-00009-of-00010.safetensors", "model.layers.30.block_sparse_moe.experts.5.w1.weight": "model-00009-of-00010.safetensors", "model.layers.30.block_sparse_moe.experts.6.w1.weight": "model-00009-of-00010.safetensors", "model.layers.30.block_sparse_moe.experts.7.w1.weight": "model-00009-of-00010.safetensors", "model.layers.31.block_sparse_moe.experts.0.w1.weight": "model-00010-of-00010.safetensors", "model.layers.31.block_sparse_moe.experts.1.w1.weight": "model-00010-of-00010.safetensors", "model.layers.31.block_sparse_moe.experts.2.w1.weight": "model-00010-of-00010.safetensors", "model.layers.31.block_sparse_moe.experts.3.w1.weight": "model-00010-of-00010.safetensors", "model.layers.31.block_sparse_moe.experts.4.w1.weight": "model-00010-of-00010.safetensors", "model.layers.31.block_sparse_moe.experts.5.w1.weight": "model-00010-of-00010.safetensors", "model.layers.31.block_sparse_moe.experts.6.w1.weight": "model-00010-of-00010.safetensors", "model.layers.31.block_sparse_moe.experts.7.w1.weight": "model-00010-of-00010.safetensors", "model.layers.0.post_attention_layernorm.weight": "model-00010-of-00010.safetensors", "model.layers.1.post_attention_layernorm.weight": "model-00010-of-00010.safetensors", "model.layers.2.post_attention_layernorm.weight": "model-00010-of-00010.safetensors", "model.layers.3.post_attention_layernorm.weight": "model-00010-of-00010.safetensors", "model.layers.4.post_attention_layernorm.weight": "model-00010-of-00010.safetensors", "model.layers.5.post_attention_layernorm.weight": "model-00010-of-00010.safetensors", "model.layers.6.post_attention_layernorm.weight": "model-00010-of-00010.safetensors", "model.layers.7.post_attention_layernorm.weight": "model-00010-of-00010.safetensors", "model.layers.8.post_attention_layernorm.weight": "model-00010-of-00010.safetensors", "model.layers.9.post_attention_layernorm.weight": "model-00010-of-00010.safetensors", "model.layers.10.post_attention_layernorm.weight": "model-00010-of-00010.safetensors", "model.layers.11.post_attention_layernorm.weight": "model-00010-of-00010.safetensors", "model.layers.12.post_attention_layernorm.weight": "model-00010-of-00010.safetensors", "model.layers.13.post_attention_layernorm.weight": "model-00010-of-00010.safetensors", "model.layers.14.post_attention_layernorm.weight": "model-00010-of-00010.safetensors", "model.layers.15.post_attention_layernorm.weight": "model-00010-of-00010.safetensors", "model.layers.16.post_attention_layernorm.weight": "model-00010-of-00010.safetensors", "model.layers.17.post_attention_layernorm.weight": "model-00010-of-00010.safetensors", "model.layers.18.post_attention_layernorm.weight": "model-00010-of-00010.safetensors", "model.layers.19.post_attention_layernorm.weight": "model-00010-of-00010.safetensors", "model.layers.20.post_attention_layernorm.weight": "model-00010-of-00010.safetensors", "model.layers.21.post_attention_layernorm.weight": "model-00010-of-00010.safetensors", "model.layers.22.post_attention_layernorm.weight": "model-00010-of-00010.safetensors", "model.layers.23.post_attention_layernorm.weight": "model-00010-of-00010.safetensors", "model.layers.24.post_attention_layernorm.weight": "model-00010-of-00010.safetensors", "model.layers.25.post_attention_layernorm.weight": "model-00010-of-00010.safetensors", "model.layers.26.post_attention_layernorm.weight": "model-00010-of-00010.safetensors", "model.layers.27.post_attention_layernorm.weight": "model-00010-of-00010.safetensors", "model.layers.28.post_attention_layernorm.weight": "model-00010-of-00010.safetensors", "model.layers.29.post_attention_layernorm.weight": "model-00010-of-00010.safetensors", "model.layers.30.post_attention_layernorm.weight": "model-00010-of-00010.safetensors", "model.layers.31.post_attention_layernorm.weight": "model-00010-of-00010.safetensors", "model.layers.0.self_attn.q_proj.weight": "model-00010-of-00010.safetensors", "model.layers.1.self_attn.q_proj.weight": "model-00010-of-00010.safetensors", "model.layers.2.self_attn.q_proj.weight": "model-00010-of-00010.safetensors", "model.layers.3.self_attn.q_proj.weight": "model-00010-of-00010.safetensors", "model.layers.4.self_attn.q_proj.weight": "model-00010-of-00010.safetensors", "model.layers.5.self_attn.q_proj.weight": "model-00010-of-00010.safetensors", "model.layers.6.self_attn.q_proj.weight": "model-00010-of-00010.safetensors", "model.layers.7.self_attn.q_proj.weight": "model-00010-of-00010.safetensors", "model.layers.8.self_attn.q_proj.weight": "model-00010-of-00010.safetensors", "model.layers.9.self_attn.q_proj.weight": "model-00010-of-00010.safetensors", "model.layers.10.self_attn.q_proj.weight": "model-00010-of-00010.safetensors", "model.layers.11.self_attn.q_proj.weight": "model-00010-of-00010.safetensors", "model.layers.12.self_attn.q_proj.weight": "model-00010-of-00010.safetensors", "model.layers.13.self_attn.q_proj.weight": "model-00010-of-00010.safetensors", "model.layers.14.self_attn.q_proj.weight": "model-00010-of-00010.safetensors", "model.layers.15.self_attn.q_proj.weight": "model-00010-of-00010.safetensors", "model.layers.16.self_attn.q_proj.weight": "model-00010-of-00010.safetensors", "model.layers.17.self_attn.q_proj.weight": "model-00010-of-00010.safetensors", "model.layers.18.self_attn.q_proj.weight": "model-00010-of-00010.safetensors", "model.layers.19.self_attn.q_proj.weight": "model-00010-of-00010.safetensors", "model.layers.20.self_attn.q_proj.weight": "model-00010-of-00010.safetensors", "model.layers.21.self_attn.q_proj.weight": "model-00010-of-00010.safetensors", "model.layers.22.self_attn.q_proj.weight": "model-00010-of-00010.safetensors", "model.layers.23.self_attn.q_proj.weight": "model-00010-of-00010.safetensors", "model.layers.24.self_attn.q_proj.weight": "model-00010-of-00010.safetensors", "model.layers.25.self_attn.q_proj.weight": "model-00010-of-00010.safetensors", "model.layers.26.self_attn.q_proj.weight": "model-00010-of-00010.safetensors", "model.layers.27.self_attn.q_proj.weight": "model-00010-of-00010.safetensors", "model.layers.28.self_attn.q_proj.weight": "model-00010-of-00010.safetensors", "model.layers.29.self_attn.q_proj.weight": "model-00010-of-00010.safetensors", "model.layers.30.self_attn.q_proj.weight": "model-00010-of-00010.safetensors", "model.layers.31.self_attn.q_proj.weight": "model-00010-of-00010.safetensors", "model.layers.0.self_attn.k_proj.weight": "model-00010-of-00010.safetensors", "model.layers.1.self_attn.k_proj.weight": "model-00010-of-00010.safetensors", "model.layers.2.self_attn.k_proj.weight": "model-00010-of-00010.safetensors", "model.layers.3.self_attn.k_proj.weight": "model-00010-of-00010.safetensors", "model.layers.4.self_attn.k_proj.weight": "model-00010-of-00010.safetensors", "model.layers.5.self_attn.k_proj.weight": "model-00010-of-00010.safetensors", "model.layers.6.self_attn.k_proj.weight": "model-00010-of-00010.safetensors", "model.layers.7.self_attn.k_proj.weight": "model-00010-of-00010.safetensors", "model.layers.8.self_attn.k_proj.weight": "model-00010-of-00010.safetensors", "model.layers.9.self_attn.k_proj.weight": "model-00010-of-00010.safetensors", "model.layers.10.self_attn.k_proj.weight": "model-00010-of-00010.safetensors", "model.layers.11.self_attn.k_proj.weight": "model-00010-of-00010.safetensors", "model.layers.12.self_attn.k_proj.weight": "model-00010-of-00010.safetensors", "model.layers.13.self_attn.k_proj.weight": "model-00010-of-00010.safetensors", "model.layers.14.self_attn.k_proj.weight": "model-00010-of-00010.safetensors", "model.layers.15.self_attn.k_proj.weight": "model-00010-of-00010.safetensors", "model.layers.16.self_attn.k_proj.weight": "model-00010-of-00010.safetensors", "model.layers.17.self_attn.k_proj.weight": "model-00010-of-00010.safetensors", "model.layers.18.self_attn.k_proj.weight": "model-00010-of-00010.safetensors", "model.layers.19.self_attn.k_proj.weight": "model-00010-of-00010.safetensors", "model.layers.20.self_attn.k_proj.weight": "model-00010-of-00010.safetensors", "model.layers.21.self_attn.k_proj.weight": "model-00010-of-00010.safetensors", "model.layers.22.self_attn.k_proj.weight": "model-00010-of-00010.safetensors", "model.layers.23.self_attn.k_proj.weight": "model-00010-of-00010.safetensors", "model.layers.24.self_attn.k_proj.weight": "model-00010-of-00010.safetensors", "model.layers.25.self_attn.k_proj.weight": "model-00010-of-00010.safetensors", "model.layers.26.self_attn.k_proj.weight": "model-00010-of-00010.safetensors", "model.layers.27.self_attn.k_proj.weight": "model-00010-of-00010.safetensors", "model.layers.28.self_attn.k_proj.weight": "model-00010-of-00010.safetensors", "model.layers.29.self_attn.k_proj.weight": "model-00010-of-00010.safetensors", "model.layers.30.self_attn.k_proj.weight": "model-00010-of-00010.safetensors", "model.layers.31.self_attn.k_proj.weight": "model-00010-of-00010.safetensors", "model.layers.0.self_attn.v_proj.weight": "model-00010-of-00010.safetensors", "model.layers.1.self_attn.v_proj.weight": "model-00010-of-00010.safetensors", "model.layers.2.self_attn.v_proj.weight": "model-00010-of-00010.safetensors", "model.layers.3.self_attn.v_proj.weight": "model-00010-of-00010.safetensors", "model.layers.4.self_attn.v_proj.weight": "model-00010-of-00010.safetensors", "model.layers.5.self_attn.v_proj.weight": "model-00010-of-00010.safetensors", "model.layers.6.self_attn.v_proj.weight": "model-00010-of-00010.safetensors", "model.layers.7.self_attn.v_proj.weight": "model-00010-of-00010.safetensors", "model.layers.8.self_attn.v_proj.weight": "model-00010-of-00010.safetensors", "model.layers.9.self_attn.v_proj.weight": "model-00010-of-00010.safetensors", "model.layers.10.self_attn.v_proj.weight": "model-00010-of-00010.safetensors", "model.layers.11.self_attn.v_proj.weight": "model-00010-of-00010.safetensors", "model.layers.12.self_attn.v_proj.weight": "model-00010-of-00010.safetensors", "model.layers.13.self_attn.v_proj.weight": "model-00010-of-00010.safetensors", "model.layers.14.self_attn.v_proj.weight": "model-00010-of-00010.safetensors", "model.layers.15.self_attn.v_proj.weight": "model-00010-of-00010.safetensors", "model.layers.16.self_attn.v_proj.weight": "model-00010-of-00010.safetensors", "model.layers.17.self_attn.v_proj.weight": "model-00010-of-00010.safetensors", "model.layers.18.self_attn.v_proj.weight": "model-00010-of-00010.safetensors", "model.layers.19.self_attn.v_proj.weight": "model-00010-of-00010.safetensors", "model.layers.20.self_attn.v_proj.weight": "model-00010-of-00010.safetensors", "model.layers.21.self_attn.v_proj.weight": "model-00010-of-00010.safetensors", "model.layers.22.self_attn.v_proj.weight": "model-00010-of-00010.safetensors", "model.layers.23.self_attn.v_proj.weight": "model-00010-of-00010.safetensors", "model.layers.24.self_attn.v_proj.weight": "model-00010-of-00010.safetensors", "model.layers.25.self_attn.v_proj.weight": "model-00010-of-00010.safetensors", "model.layers.26.self_attn.v_proj.weight": "model-00010-of-00010.safetensors", "model.layers.27.self_attn.v_proj.weight": "model-00010-of-00010.safetensors", "model.layers.28.self_attn.v_proj.weight": "model-00010-of-00010.safetensors", "model.layers.29.self_attn.v_proj.weight": "model-00010-of-00010.safetensors", "model.layers.30.self_attn.v_proj.weight": "model-00010-of-00010.safetensors", "model.layers.31.self_attn.v_proj.weight": "model-00010-of-00010.safetensors", "model.layers.0.self_attn.o_proj.weight": "model-00010-of-00010.safetensors", "model.layers.1.self_attn.o_proj.weight": "model-00010-of-00010.safetensors", "model.layers.2.self_attn.o_proj.weight": "model-00010-of-00010.safetensors", "model.layers.3.self_attn.o_proj.weight": "model-00010-of-00010.safetensors", "model.layers.4.self_attn.o_proj.weight": "model-00010-of-00010.safetensors", "model.layers.5.self_attn.o_proj.weight": "model-00010-of-00010.safetensors", "model.layers.6.self_attn.o_proj.weight": "model-00010-of-00010.safetensors", "model.layers.7.self_attn.o_proj.weight": "model-00010-of-00010.safetensors", "model.layers.8.self_attn.o_proj.weight": "model-00010-of-00010.safetensors", "model.layers.9.self_attn.o_proj.weight": "model-00010-of-00010.safetensors", "model.layers.10.self_attn.o_proj.weight": "model-00010-of-00010.safetensors", "model.layers.11.self_attn.o_proj.weight": "model-00010-of-00010.safetensors", "model.layers.12.self_attn.o_proj.weight": "model-00010-of-00010.safetensors", "model.layers.13.self_attn.o_proj.weight": "model-00010-of-00010.safetensors", "model.layers.14.self_attn.o_proj.weight": "model-00010-of-00010.safetensors", "model.layers.15.self_attn.o_proj.weight": "model-00010-of-00010.safetensors", "model.layers.16.self_attn.o_proj.weight": "model-00010-of-00010.safetensors", "model.layers.17.self_attn.o_proj.weight": "model-00010-of-00010.safetensors", "model.layers.18.self_attn.o_proj.weight": "model-00010-of-00010.safetensors", "model.layers.19.self_attn.o_proj.weight": "model-00010-of-00010.safetensors", "model.layers.20.self_attn.o_proj.weight": "model-00010-of-00010.safetensors", "model.layers.21.self_attn.o_proj.weight": "model-00010-of-00010.safetensors", "model.layers.22.self_attn.o_proj.weight": "model-00010-of-00010.safetensors", "model.layers.23.self_attn.o_proj.weight": "model-00010-of-00010.safetensors", "model.layers.24.self_attn.o_proj.weight": "model-00010-of-00010.safetensors", "model.layers.25.self_attn.o_proj.weight": "model-00010-of-00010.safetensors", "model.layers.26.self_attn.o_proj.weight": "model-00010-of-00010.safetensors", "model.layers.27.self_attn.o_proj.weight": "model-00010-of-00010.safetensors", "model.layers.28.self_attn.o_proj.weight": "model-00010-of-00010.safetensors", "model.layers.29.self_attn.o_proj.weight": "model-00010-of-00010.safetensors", "model.layers.30.self_attn.o_proj.weight": "model-00010-of-00010.safetensors", "model.layers.31.self_attn.o_proj.weight": "model-00010-of-00010.safetensors", "model.layers.0.block_sparse_moe.gate.weight": "model-00010-of-00010.safetensors", "model.layers.1.block_sparse_moe.gate.weight": "model-00010-of-00010.safetensors", "model.layers.2.block_sparse_moe.gate.weight": "model-00010-of-00010.safetensors", "model.layers.3.block_sparse_moe.gate.weight": "model-00010-of-00010.safetensors", "model.layers.4.block_sparse_moe.gate.weight": "model-00010-of-00010.safetensors", "model.layers.5.block_sparse_moe.gate.weight": "model-00010-of-00010.safetensors", "model.layers.6.block_sparse_moe.gate.weight": "model-00010-of-00010.safetensors", "model.layers.7.block_sparse_moe.gate.weight": "model-00010-of-00010.safetensors", "model.layers.8.block_sparse_moe.gate.weight": "model-00010-of-00010.safetensors", "model.layers.9.block_sparse_moe.gate.weight": "model-00010-of-00010.safetensors", "model.layers.10.block_sparse_moe.gate.weight": "model-00010-of-00010.safetensors", "model.layers.11.block_sparse_moe.gate.weight": "model-00010-of-00010.safetensors", "model.layers.12.block_sparse_moe.gate.weight": "model-00010-of-00010.safetensors", "model.layers.13.block_sparse_moe.gate.weight": "model-00010-of-00010.safetensors", "model.layers.14.block_sparse_moe.gate.weight": "model-00010-of-00010.safetensors", "model.layers.15.block_sparse_moe.gate.weight": "model-00010-of-00010.safetensors", "model.layers.16.block_sparse_moe.gate.weight": "model-00010-of-00010.safetensors", "model.layers.17.block_sparse_moe.gate.weight": "model-00010-of-00010.safetensors", "model.layers.18.block_sparse_moe.gate.weight": "model-00010-of-00010.safetensors", "model.layers.19.block_sparse_moe.gate.weight": "model-00010-of-00010.safetensors", "model.layers.20.block_sparse_moe.gate.weight": "model-00010-of-00010.safetensors", "model.layers.21.block_sparse_moe.gate.weight": "model-00010-of-00010.safetensors", "model.layers.22.block_sparse_moe.gate.weight": "model-00010-of-00010.safetensors", "model.layers.23.block_sparse_moe.gate.weight": "model-00010-of-00010.safetensors", "model.layers.24.block_sparse_moe.gate.weight": "model-00010-of-00010.safetensors", "model.layers.25.block_sparse_moe.gate.weight": "model-00010-of-00010.safetensors", "model.layers.26.block_sparse_moe.gate.weight": "model-00010-of-00010.safetensors", "model.layers.27.block_sparse_moe.gate.weight": "model-00010-of-00010.safetensors", "model.layers.28.block_sparse_moe.gate.weight": "model-00010-of-00010.safetensors", "model.layers.29.block_sparse_moe.gate.weight": "model-00010-of-00010.safetensors", "model.layers.30.block_sparse_moe.gate.weight": "model-00010-of-00010.safetensors", "model.layers.31.block_sparse_moe.gate.weight": "model-00010-of-00010.safetensors"}}
|
output-00001-of-00005.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e7e5156974c284b51ac69cb56a959d39320b275c3907cd2dfb632ed8d8c58a3a
|
3 |
+
size 8589723064
|
output-00002-of-00005.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7aaa9f23535fa94f7ba061e0317c54f2aec61b6ea9f190f5d9b262c96644e0f5
|
3 |
+
size 8589414128
|
output-00003-of-00005.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc58d704cbe73d6caef0a0b6f769f111e2b1ebca732cc69828065b56d8e63aa6
|
3 |
+
size 8574414816
|
output-00004-of-00005.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d46eb6aef112ea34fc6b2506120bc6ca3e19a76ad5382c4eed2be97a8e8d7f3
|
3 |
+
size 8572052488
|
output-00005-of-00005.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:393bfc8c4f63f4968f26a23ca6db01255282f976ad32092c79a03bcf63f00a86
|
3 |
+
size 867326312
|
special_tokens_map.json
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"additional_special_tokens": [
|
3 |
+
"<unk>",
|
4 |
+
"<s>",
|
5 |
+
"</s>"
|
6 |
+
],
|
7 |
+
"bos_token": {
|
8 |
+
"content": "<s>",
|
9 |
+
"lstrip": false,
|
10 |
+
"normalized": false,
|
11 |
+
"rstrip": false,
|
12 |
+
"single_word": false
|
13 |
+
},
|
14 |
+
"eos_token": {
|
15 |
+
"content": "</s>",
|
16 |
+
"lstrip": false,
|
17 |
+
"normalized": false,
|
18 |
+
"rstrip": false,
|
19 |
+
"single_word": false
|
20 |
+
},
|
21 |
+
"pad_token": "<s>",
|
22 |
+
"unk_token": {
|
23 |
+
"content": "<unk>",
|
24 |
+
"lstrip": false,
|
25 |
+
"normalized": false,
|
26 |
+
"rstrip": false,
|
27 |
+
"single_word": false
|
28 |
+
}
|
29 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
|
3 |
+
size 493443
|
tokenizer_config.json
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": true,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"added_tokens_decoder": {
|
5 |
+
"0": {
|
6 |
+
"content": "<unk>",
|
7 |
+
"lstrip": false,
|
8 |
+
"normalized": false,
|
9 |
+
"rstrip": false,
|
10 |
+
"single_word": false,
|
11 |
+
"special": true
|
12 |
+
},
|
13 |
+
"1": {
|
14 |
+
"content": "<s>",
|
15 |
+
"lstrip": false,
|
16 |
+
"normalized": false,
|
17 |
+
"rstrip": false,
|
18 |
+
"single_word": false,
|
19 |
+
"special": true
|
20 |
+
},
|
21 |
+
"2": {
|
22 |
+
"content": "</s>",
|
23 |
+
"lstrip": false,
|
24 |
+
"normalized": false,
|
25 |
+
"rstrip": false,
|
26 |
+
"single_word": false,
|
27 |
+
"special": true
|
28 |
+
}
|
29 |
+
},
|
30 |
+
"additional_special_tokens": [
|
31 |
+
"<unk>",
|
32 |
+
"<s>",
|
33 |
+
"</s>"
|
34 |
+
],
|
35 |
+
"bos_token": "<s>",
|
36 |
+
"clean_up_tokenization_spaces": false,
|
37 |
+
"eos_token": "</s>",
|
38 |
+
"legacy": true,
|
39 |
+
"model_max_length": 1000000000000000019884624838656,
|
40 |
+
"pad_token": "<s>",
|
41 |
+
"padding_side": "left",
|
42 |
+
"sp_model_kwargs": {},
|
43 |
+
"spaces_between_special_tokens": false,
|
44 |
+
"split_special_tokens": false,
|
45 |
+
"tokenizer_class": "LlamaTokenizer",
|
46 |
+
"unk_token": "<unk>",
|
47 |
+
"use_default_system_prompt": true
|
48 |
+
}
|