diff --git a/llama-hf/13B/config.json b/llama-hf/13B/config.json new file mode 100644 index 0000000000000000000000000000000000000000..14d449ecdf6bd0ddd31d26cb06ab45504c375f0e --- /dev/null +++ b/llama-hf/13B/config.json @@ -0,0 +1,22 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "intermediate_size": 13824, + "max_position_embeddings": 2048, + "model_type": "llama", + "num_attention_heads": 40, + "num_hidden_layers": 40, + "pad_token_id": 0, + "rms_norm_eps": 1e-06, + "tie_word_embeddings": false, + "torch_dtype": "float16", + "transformers_version": "4.28.0.dev0", + "use_cache": true, + "vocab_size": 32000 +} diff --git a/llama-hf/13B/generation_config.json b/llama-hf/13B/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..55d7b5b6db760f8c1963be3d56a3bc363bacdfb1 --- /dev/null +++ b/llama-hf/13B/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "pad_token_id": 0, + "transformers_version": "4.28.0.dev0" +} diff --git a/llama-hf/13B/model-00001-of-00003.safetensors b/llama-hf/13B/model-00001-of-00003.safetensors new file mode 120000 index 0000000000000000000000000000000000000000..39e9253f04cd19151b694f0a44ea103b95b20584 --- /dev/null +++ b/llama-hf/13B/model-00001-of-00003.safetensors @@ -0,0 +1 @@ +../../.git/annex/objects/QG/MG/SHA256E-s9948693278--d062759121ccb01dfa6490927d200d93beff422b848208a06a57c61becaa8b64/SHA256E-s9948693278--d062759121ccb01dfa6490927d200d93beff422b848208a06a57c61becaa8b64 \ No newline at end of file diff --git a/llama-hf/13B/model-00002-of-00003.safetensors b/llama-hf/13B/model-00002-of-00003.safetensors new file mode 120000 index 0000000000000000000000000000000000000000..301e70f7acdcc71bbda0d86a3b98b3d551130f28 --- /dev/null +++ b/llama-hf/13B/model-00002-of-00003.safetensors @@ -0,0 +1 @@ +../../.git/annex/objects/zf/q4/SHA256E-s9904129374--9e642ef8943eb3262dc6bea972573cddf4455d02e432b04f82d056e7b98d570a/SHA256E-s9904129374--9e642ef8943eb3262dc6bea972573cddf4455d02e432b04f82d056e7b98d570a \ No newline at end of file diff --git a/llama-hf/13B/model-00003-of-00003.safetensors b/llama-hf/13B/model-00003-of-00003.safetensors new file mode 120000 index 0000000000000000000000000000000000000000..4228b93943032644492c7a15803d6dbf8debe6c3 --- /dev/null +++ b/llama-hf/13B/model-00003-of-00003.safetensors @@ -0,0 +1 @@ +../../.git/annex/objects/Gk/gW/SHA256E-s6178962272--ff4de777591c2515623190b88f1dcf80fb7539e3cb06fbc49592890b5c132ba1/SHA256E-s6178962272--ff4de777591c2515623190b88f1dcf80fb7539e3cb06fbc49592890b5c132ba1 \ No newline at end of file diff --git a/llama-hf/13B/model.safetensors.index.json b/llama-hf/13B/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..c8bab98dc3e6e36e9578882962fac9428e153df8 --- /dev/null +++ b/llama-hf/13B/model.safetensors.index.json @@ -0,0 +1,410 @@ +{ + "metadata": { + "total_size": 26031738880 + }, + "weight_map": { + "lm_head.weight": "model-00003-of-00003.safetensors", + "model.embed_tokens.weight": "model-00001-of-00003.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.0.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.1.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.10.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.10.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.11.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.11.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.12.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.12.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.13.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.13.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.14.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.14.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.15.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.16.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.17.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.18.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.19.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.2.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.20.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.20.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.21.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.21.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.22.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.22.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.23.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.23.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.24.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.24.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.25.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.25.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.26.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.26.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.27.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.27.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.28.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.28.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.29.input_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00002-of-00003.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.29.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.3.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.30.self_attn.rotary_emb.inv_freq": "model-00002-of-00003.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00002-of-00003.safetensors", + "model.layers.31.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.31.self_attn.rotary_emb.inv_freq": "model-00003-of-00003.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.32.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.32.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.32.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.32.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.32.self_attn.rotary_emb.inv_freq": "model-00003-of-00003.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.33.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.33.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.33.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.33.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.33.self_attn.rotary_emb.inv_freq": "model-00003-of-00003.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.34.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.34.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.34.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.34.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.34.self_attn.rotary_emb.inv_freq": "model-00003-of-00003.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.35.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.35.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.35.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.35.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.35.self_attn.rotary_emb.inv_freq": "model-00003-of-00003.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.36.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.36.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.36.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.36.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.36.self_attn.rotary_emb.inv_freq": "model-00003-of-00003.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.37.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.37.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.37.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.37.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.37.self_attn.rotary_emb.inv_freq": "model-00003-of-00003.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.38.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.38.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.38.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.38.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.38.self_attn.rotary_emb.inv_freq": "model-00003-of-00003.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.39.input_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.39.mlp.down_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.39.mlp.gate_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.39.mlp.up_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00003-of-00003.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.39.self_attn.rotary_emb.inv_freq": "model-00003-of-00003.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00003-of-00003.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.4.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.5.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.6.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.7.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.8.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00003.safetensors", + "model.layers.9.self_attn.rotary_emb.inv_freq": "model-00001-of-00003.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00003.safetensors", + "model.norm.weight": "model-00003-of-00003.safetensors" + } +} \ No newline at end of file diff --git a/llama-hf/13B/pytorch_model.bin.index.json b/llama-hf/13B/pytorch_model.bin.index.json new file mode 100644 index 0000000000000000000000000000000000000000..dd9a5597b14a925bbe5a278e71f5cffd1ad35096 --- /dev/null +++ b/llama-hf/13B/pytorch_model.bin.index.json @@ -0,0 +1,410 @@ +{ + "metadata": { + "total_size": 26031738880 + }, + "weight_map": { + "lm_head.weight": "pytorch_model-00003-of-00003.bin", + "model.embed_tokens.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.0.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.0.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.0.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.1.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.1.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.1.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.1.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.10.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.10.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.10.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.10.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.10.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.10.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.11.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.11.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.11.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.11.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.11.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.11.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.11.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.11.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.12.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.12.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.12.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.12.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.12.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.12.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.13.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.13.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.13.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.13.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.13.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.13.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.13.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.13.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.13.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.13.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.14.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.14.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.14.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.14.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.14.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.14.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.14.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.14.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.14.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.14.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.15.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.15.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.15.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.15.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.15.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.15.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.15.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.15.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.15.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.15.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.16.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.16.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.16.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.16.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.16.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.16.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.16.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.16.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.16.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.16.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.17.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.17.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.17.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.17.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.17.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.17.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.17.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.17.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.17.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.17.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.18.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.18.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.18.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.18.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.18.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.18.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.18.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.18.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.19.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.19.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.19.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.19.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.19.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.19.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.19.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.19.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.19.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.19.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.2.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.2.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.2.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.2.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.2.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.2.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.2.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.2.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.2.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.2.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.20.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.20.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.20.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.20.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.20.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.20.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.20.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.20.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.20.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.20.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.21.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.21.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.21.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.21.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.21.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.21.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.21.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.22.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.22.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.22.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.22.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.22.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.23.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.23.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.23.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.23.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.23.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.24.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.24.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.24.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.24.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.24.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.24.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.25.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.25.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.25.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.25.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.25.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.25.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.25.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.25.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.26.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.26.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.26.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.26.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.26.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.26.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.27.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.27.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.27.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.27.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.27.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.27.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.27.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.27.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.28.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.28.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.28.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.28.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.28.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.28.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.28.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.28.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.28.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.28.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.29.input_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.29.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.29.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.29.mlp.up_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.29.post_attention_layernorm.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.29.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.29.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.29.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.29.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.29.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.3.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.3.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.3.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.3.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.3.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.3.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.3.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.3.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.3.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.3.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.30.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.30.mlp.down_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.30.mlp.gate_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.30.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.30.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.30.self_attn.k_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.30.self_attn.o_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.30.self_attn.q_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.30.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00003.bin", + "model.layers.30.self_attn.v_proj.weight": "pytorch_model-00002-of-00003.bin", + "model.layers.31.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.31.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.31.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.31.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.31.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.31.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.31.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.31.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.31.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin", + "model.layers.31.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.32.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.32.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.32.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.32.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.32.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.32.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.32.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.32.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.32.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin", + "model.layers.32.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.33.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.33.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.33.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.33.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.33.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.33.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.33.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.33.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.33.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin", + "model.layers.33.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.34.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.34.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.34.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.34.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.34.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.34.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.34.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.34.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.34.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin", + "model.layers.34.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.35.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.35.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.35.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.35.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.35.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.35.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.35.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.35.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.35.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin", + "model.layers.35.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.36.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.36.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.36.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.36.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.36.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.36.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.36.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.36.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.36.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin", + "model.layers.36.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.37.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.37.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.37.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.37.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.37.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.37.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.37.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.37.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.37.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin", + "model.layers.37.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.38.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.38.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.38.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.38.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.38.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.38.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.38.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.38.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.38.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin", + "model.layers.38.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.39.input_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.39.mlp.down_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.39.mlp.gate_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.39.mlp.up_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.39.post_attention_layernorm.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.39.self_attn.k_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.39.self_attn.o_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.39.self_attn.q_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.39.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00003.bin", + "model.layers.39.self_attn.v_proj.weight": "pytorch_model-00003-of-00003.bin", + "model.layers.4.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.4.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.4.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.4.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.4.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.4.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.4.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.4.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.4.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.4.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.5.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.5.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.5.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.5.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.5.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.5.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.5.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.5.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.5.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.5.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.6.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.6.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.6.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.6.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.6.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.6.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.6.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.6.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.6.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.6.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.7.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.7.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.7.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.7.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.7.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.7.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.7.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.7.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.7.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.7.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.8.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.8.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.8.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.8.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.8.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.8.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.8.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.8.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.8.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.8.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.9.input_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.9.mlp.down_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.9.mlp.gate_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.9.mlp.up_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.9.post_attention_layernorm.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.9.self_attn.k_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.layers.9.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00003.bin", + "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00001-of-00003.bin", + "model.norm.weight": "pytorch_model-00003-of-00003.bin" + } +} diff --git a/llama-hf/13B/special_tokens_map.json b/llama-hf/13B/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/llama-hf/13B/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/llama-hf/13B/tokenizer.model b/llama-hf/13B/tokenizer.model new file mode 120000 index 0000000000000000000000000000000000000000..16479c4d5ef98c12b179e1557bb3d4039757d83b --- /dev/null +++ b/llama-hf/13B/tokenizer.model @@ -0,0 +1 @@ +../../.git/annex/objects/m9/qj/SHA256E-s499723--9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347/SHA256E-s499723--9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 \ No newline at end of file diff --git a/llama-hf/13B/tokenizer_config.json b/llama-hf/13B/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a54b01aa3699f19e1aea416fc337f910f60c6839 --- /dev/null +++ b/llama-hf/13B/tokenizer_config.json @@ -0,0 +1 @@ +{"bos_token": "", "eos_token": "", "model_max_length": 1000000000000000019884624838656, "tokenizer_class": "LlamaTokenizer", "unk_token": ""} \ No newline at end of file diff --git a/llama-hf/30B/config.json b/llama-hf/30B/config.json new file mode 100644 index 0000000000000000000000000000000000000000..646bdc5c055d33d735131148087fa1b7c74442bb --- /dev/null +++ b/llama-hf/30B/config.json @@ -0,0 +1,22 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 6656, + "initializer_range": 0.02, + "intermediate_size": 17920, + "max_position_embeddings": 2048, + "model_type": "llama", + "num_attention_heads": 52, + "num_hidden_layers": 60, + "pad_token_id": 0, + "rms_norm_eps": 1e-06, + "tie_word_embeddings": false, + "torch_dtype": "float16", + "transformers_version": "4.28.0.dev0", + "use_cache": true, + "vocab_size": 32000 +} diff --git a/llama-hf/30B/generation_config.json b/llama-hf/30B/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..55d7b5b6db760f8c1963be3d56a3bc363bacdfb1 --- /dev/null +++ b/llama-hf/30B/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "pad_token_id": 0, + "transformers_version": "4.28.0.dev0" +} diff --git a/llama-hf/30B/model-00001-of-00007.safetensors b/llama-hf/30B/model-00001-of-00007.safetensors new file mode 120000 index 0000000000000000000000000000000000000000..75aed6d031bba8f6c392b6920af09c374c3b4019 --- /dev/null +++ b/llama-hf/30B/model-00001-of-00007.safetensors @@ -0,0 +1 @@ +../../.git/annex/objects/3J/j0/SHA256E-s9818304536--b75f80feefbb9b2a75b6acd0f0e50a9297bbb79a10bfb20fe188d103ae287a37/SHA256E-s9818304536--b75f80feefbb9b2a75b6acd0f0e50a9297bbb79a10bfb20fe188d103ae287a37 \ No newline at end of file diff --git a/llama-hf/30B/model-00002-of-00007.safetensors b/llama-hf/30B/model-00002-of-00007.safetensors new file mode 120000 index 0000000000000000000000000000000000000000..4bebf0cf9160d610da656e9b2b245604a30f0259 --- /dev/null +++ b/llama-hf/30B/model-00002-of-00007.safetensors @@ -0,0 +1 @@ +../../.git/annex/objects/zZ/2P/SHA256E-s9958081294--a9348c935b7d4cd7db5a9f133dd099eb3ac4af7499f9cbf908b5158d3820c3fa/SHA256E-s9958081294--a9348c935b7d4cd7db5a9f133dd099eb3ac4af7499f9cbf908b5158d3820c3fa \ No newline at end of file diff --git a/llama-hf/30B/model-00003-of-00007.safetensors b/llama-hf/30B/model-00003-of-00007.safetensors new file mode 120000 index 0000000000000000000000000000000000000000..56c0a1f9fbbaefc5ed9b58b44de7c1a8416b9ef2 --- /dev/null +++ b/llama-hf/30B/model-00003-of-00007.safetensors @@ -0,0 +1 @@ +../../.git/annex/objects/K9/GV/SHA256E-s9896713232--9ca93995654226ee15f6bfbd95689cd31a37b41dc77c3e24858d20240a8e975f/SHA256E-s9896713232--9ca93995654226ee15f6bfbd95689cd31a37b41dc77c3e24858d20240a8e975f \ No newline at end of file diff --git a/llama-hf/30B/model-00004-of-00007.safetensors b/llama-hf/30B/model-00004-of-00007.safetensors new file mode 120000 index 0000000000000000000000000000000000000000..68b9ee64ce0eee8e477a8174e26de06d163e8234 --- /dev/null +++ b/llama-hf/30B/model-00004-of-00007.safetensors @@ -0,0 +1 @@ +../../.git/annex/objects/1Q/fJ/SHA256E-s9869449664--655a66a9964b72a83fde4ad73f864b8e7d6c125babd312989c66bb3be4429704/SHA256E-s9869449664--655a66a9964b72a83fde4ad73f864b8e7d6c125babd312989c66bb3be4429704 \ No newline at end of file diff --git a/llama-hf/30B/model-00005-of-00007.safetensors b/llama-hf/30B/model-00005-of-00007.safetensors new file mode 120000 index 0000000000000000000000000000000000000000..ec1c1867a5fedceeb0a18af5178b8b41be462d79 --- /dev/null +++ b/llama-hf/30B/model-00005-of-00007.safetensors @@ -0,0 +1 @@ +../../.git/annex/objects/xm/ww/SHA256E-s9869449664--bcfcc763da8f4fb98772ae12f1ffc721c5f65f2e9c1112cc103095164544a728/SHA256E-s9869449664--bcfcc763da8f4fb98772ae12f1ffc721c5f65f2e9c1112cc103095164544a728 \ No newline at end of file diff --git a/llama-hf/30B/model-00006-of-00007.safetensors b/llama-hf/30B/model-00006-of-00007.safetensors new file mode 120000 index 0000000000000000000000000000000000000000..471fe81fe7cb5d6e4fbe75d5a94617bae5a7fbeb --- /dev/null +++ b/llama-hf/30B/model-00006-of-00007.safetensors @@ -0,0 +1 @@ +../../.git/annex/objects/z2/P3/SHA256E-s9958081304--b42f41c7669d00e92b81f872a83da78a4071341b4dc4a79c71fb430daf8961b6/SHA256E-s9958081304--b42f41c7669d00e92b81f872a83da78a4071341b4dc4a79c71fb430daf8961b6 \ No newline at end of file diff --git a/llama-hf/30B/model-00007-of-00007.safetensors b/llama-hf/30B/model-00007-of-00007.safetensors new file mode 120000 index 0000000000000000000000000000000000000000..442956e810e4f49f291ee137a335b78dd856966a --- /dev/null +++ b/llama-hf/30B/model-00007-of-00007.safetensors @@ -0,0 +1 @@ +../../.git/annex/objects/Z4/12/SHA256E-s5687891898--5ebce4cb65483b47d36f85d048c03b1126dd8fbf9bc5dbf6c3f72bf217aef505/SHA256E-s5687891898--5ebce4cb65483b47d36f85d048c03b1126dd8fbf9bc5dbf6c3f72bf217aef505 \ No newline at end of file diff --git a/llama-hf/30B/model.safetensors.index.json b/llama-hf/30B/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..90259b92c3b66f151db2c55eff9f6ba7ae94a000 --- /dev/null +++ b/llama-hf/30B/model.safetensors.index.json @@ -0,0 +1,610 @@ +{ + "metadata": { + "total_size": 65057902592 + }, + "weight_map": { + "lm_head.weight": "model-00007-of-00007.safetensors", + "model.embed_tokens.weight": "model-00001-of-00007.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.rotary_emb.inv_freq": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.rotary_emb.inv_freq": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.10.self_attn.rotary_emb.inv_freq": "model-00002-of-00007.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.11.self_attn.rotary_emb.inv_freq": "model-00002-of-00007.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.12.self_attn.rotary_emb.inv_freq": "model-00002-of-00007.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.13.self_attn.rotary_emb.inv_freq": "model-00002-of-00007.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.14.self_attn.rotary_emb.inv_freq": "model-00002-of-00007.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.15.self_attn.rotary_emb.inv_freq": "model-00002-of-00007.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.16.self_attn.rotary_emb.inv_freq": "model-00002-of-00007.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.17.self_attn.rotary_emb.inv_freq": "model-00002-of-00007.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.18.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.18.self_attn.rotary_emb.inv_freq": "model-00003-of-00007.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.19.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.19.self_attn.rotary_emb.inv_freq": "model-00003-of-00007.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.rotary_emb.inv_freq": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.20.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.20.self_attn.rotary_emb.inv_freq": "model-00003-of-00007.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.21.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.21.self_attn.rotary_emb.inv_freq": "model-00003-of-00007.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.22.self_attn.rotary_emb.inv_freq": "model-00003-of-00007.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.23.self_attn.rotary_emb.inv_freq": "model-00003-of-00007.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.24.self_attn.rotary_emb.inv_freq": "model-00003-of-00007.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.25.self_attn.rotary_emb.inv_freq": "model-00003-of-00007.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.26.self_attn.rotary_emb.inv_freq": "model-00003-of-00007.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.27.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.27.self_attn.rotary_emb.inv_freq": "model-00003-of-00007.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.28.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.28.self_attn.rotary_emb.inv_freq": "model-00004-of-00007.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.29.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.29.self_attn.rotary_emb.inv_freq": "model-00004-of-00007.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.rotary_emb.inv_freq": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.30.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.30.self_attn.rotary_emb.inv_freq": "model-00004-of-00007.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.31.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.31.self_attn.rotary_emb.inv_freq": "model-00004-of-00007.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.32.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.32.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.32.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.32.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.32.self_attn.rotary_emb.inv_freq": "model-00004-of-00007.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.33.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.33.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.33.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.33.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.33.self_attn.rotary_emb.inv_freq": "model-00004-of-00007.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.34.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.34.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.34.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.34.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.34.self_attn.rotary_emb.inv_freq": "model-00004-of-00007.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.35.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.35.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.35.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.35.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.35.self_attn.rotary_emb.inv_freq": "model-00004-of-00007.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.36.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.36.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.36.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.36.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.36.self_attn.rotary_emb.inv_freq": "model-00004-of-00007.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.37.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.37.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.37.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.37.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.37.self_attn.rotary_emb.inv_freq": "model-00005-of-00007.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.38.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.38.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.38.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.38.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.38.self_attn.rotary_emb.inv_freq": "model-00005-of-00007.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.39.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.39.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.39.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.39.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.39.self_attn.rotary_emb.inv_freq": "model-00005-of-00007.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.4.self_attn.rotary_emb.inv_freq": "model-00001-of-00007.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.40.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.40.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.40.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.40.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.40.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.40.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.40.self_attn.rotary_emb.inv_freq": "model-00005-of-00007.safetensors", + "model.layers.40.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.41.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.41.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.41.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.41.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.41.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.41.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.41.self_attn.rotary_emb.inv_freq": "model-00005-of-00007.safetensors", + "model.layers.41.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.42.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.42.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.42.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.42.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.42.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.42.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.42.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.42.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.42.self_attn.rotary_emb.inv_freq": "model-00005-of-00007.safetensors", + "model.layers.42.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.43.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.43.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.43.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.43.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.43.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.43.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.43.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.43.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.43.self_attn.rotary_emb.inv_freq": "model-00005-of-00007.safetensors", + "model.layers.43.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.44.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.44.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.44.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.44.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.44.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.44.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.44.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.44.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.44.self_attn.rotary_emb.inv_freq": "model-00005-of-00007.safetensors", + "model.layers.44.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.45.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.45.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.45.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.45.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.45.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.45.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.45.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.45.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.45.self_attn.rotary_emb.inv_freq": "model-00005-of-00007.safetensors", + "model.layers.45.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.46.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.46.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.46.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.46.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.46.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.46.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.46.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.46.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.46.self_attn.rotary_emb.inv_freq": "model-00006-of-00007.safetensors", + "model.layers.46.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.47.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.47.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.47.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.47.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.47.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.47.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.47.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.47.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.47.self_attn.rotary_emb.inv_freq": "model-00006-of-00007.safetensors", + "model.layers.47.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.48.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.48.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.48.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.48.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.48.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.48.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.48.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.48.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.48.self_attn.rotary_emb.inv_freq": "model-00006-of-00007.safetensors", + "model.layers.48.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.49.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.49.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.49.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.49.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.49.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.49.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.49.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.49.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.49.self_attn.rotary_emb.inv_freq": "model-00006-of-00007.safetensors", + "model.layers.49.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.5.self_attn.rotary_emb.inv_freq": "model-00001-of-00007.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.50.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.50.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.50.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.50.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.50.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.50.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.50.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.50.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.50.self_attn.rotary_emb.inv_freq": "model-00006-of-00007.safetensors", + "model.layers.50.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.51.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.51.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.51.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.51.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.51.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.51.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.51.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.51.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.51.self_attn.rotary_emb.inv_freq": "model-00006-of-00007.safetensors", + "model.layers.51.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.52.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.52.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.52.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.52.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.52.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.52.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.52.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.52.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.52.self_attn.rotary_emb.inv_freq": "model-00006-of-00007.safetensors", + "model.layers.52.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.53.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.53.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.53.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.53.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.53.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.53.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.53.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.53.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.53.self_attn.rotary_emb.inv_freq": "model-00006-of-00007.safetensors", + "model.layers.53.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.54.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.54.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.54.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.54.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.54.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.54.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.54.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.54.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.54.self_attn.rotary_emb.inv_freq": "model-00006-of-00007.safetensors", + "model.layers.54.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.55.input_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.55.mlp.down_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.55.mlp.gate_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.55.mlp.up_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.55.post_attention_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.55.self_attn.k_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.55.self_attn.o_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.55.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.55.self_attn.rotary_emb.inv_freq": "model-00007-of-00007.safetensors", + "model.layers.55.self_attn.v_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.56.input_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.56.mlp.down_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.56.mlp.gate_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.56.mlp.up_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.56.post_attention_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.56.self_attn.k_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.56.self_attn.o_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.56.self_attn.q_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.56.self_attn.rotary_emb.inv_freq": "model-00007-of-00007.safetensors", + "model.layers.56.self_attn.v_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.57.input_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.57.mlp.down_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.57.mlp.gate_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.57.mlp.up_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.57.post_attention_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.57.self_attn.k_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.57.self_attn.o_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.57.self_attn.q_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.57.self_attn.rotary_emb.inv_freq": "model-00007-of-00007.safetensors", + "model.layers.57.self_attn.v_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.58.input_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.58.mlp.down_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.58.mlp.gate_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.58.mlp.up_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.58.post_attention_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.58.self_attn.k_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.58.self_attn.o_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.58.self_attn.q_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.58.self_attn.rotary_emb.inv_freq": "model-00007-of-00007.safetensors", + "model.layers.58.self_attn.v_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.59.input_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.59.mlp.down_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.59.mlp.gate_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.59.mlp.up_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.59.post_attention_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.59.self_attn.k_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.59.self_attn.o_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.59.self_attn.q_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.59.self_attn.rotary_emb.inv_freq": "model-00007-of-00007.safetensors", + "model.layers.59.self_attn.v_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.6.self_attn.rotary_emb.inv_freq": "model-00001-of-00007.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.7.self_attn.rotary_emb.inv_freq": "model-00001-of-00007.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.8.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.8.self_attn.rotary_emb.inv_freq": "model-00001-of-00007.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.9.self_attn.rotary_emb.inv_freq": "model-00002-of-00007.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.norm.weight": "model-00007-of-00007.safetensors" + } +} \ No newline at end of file diff --git a/llama-hf/30B/pytorch_model.bin.index.json b/llama-hf/30B/pytorch_model.bin.index.json new file mode 100644 index 0000000000000000000000000000000000000000..c76e80de8e9a4f0c983114aa75dc463c773b63e6 --- /dev/null +++ b/llama-hf/30B/pytorch_model.bin.index.json @@ -0,0 +1,610 @@ +{ + "metadata": { + "total_size": 65057902592 + }, + "weight_map": { + "lm_head.weight": "pytorch_model-00007-of-00007.bin", + "model.embed_tokens.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.0.mlp.gate_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.0.mlp.up_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.0.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00007.bin", + "model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.1.mlp.down_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.1.mlp.gate_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.1.mlp.up_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.1.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00007.bin", + "model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.10.input_layernorm.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.10.mlp.down_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.10.mlp.gate_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.10.mlp.up_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.10.post_attention_layernorm.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.10.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00007.bin", + "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.11.input_layernorm.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.11.mlp.down_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.11.mlp.gate_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.11.mlp.up_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.11.post_attention_layernorm.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.11.self_attn.k_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.11.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00007.bin", + "model.layers.11.self_attn.v_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.12.input_layernorm.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.12.mlp.down_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.12.mlp.gate_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.12.mlp.up_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.12.post_attention_layernorm.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.12.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00007.bin", + "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.13.input_layernorm.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.13.mlp.down_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.13.mlp.gate_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.13.mlp.up_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.13.post_attention_layernorm.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.13.self_attn.k_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.13.self_attn.o_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.13.self_attn.q_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.13.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00007.bin", + "model.layers.13.self_attn.v_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.14.input_layernorm.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.14.mlp.down_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.14.mlp.gate_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.14.mlp.up_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.14.post_attention_layernorm.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.14.self_attn.k_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.14.self_attn.o_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.14.self_attn.q_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.14.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00007.bin", + "model.layers.14.self_attn.v_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.15.input_layernorm.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.15.mlp.down_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.15.mlp.gate_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.15.mlp.up_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.15.post_attention_layernorm.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.15.self_attn.k_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.15.self_attn.o_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.15.self_attn.q_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.15.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00007.bin", + "model.layers.15.self_attn.v_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.16.input_layernorm.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.16.mlp.down_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.16.mlp.gate_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.16.mlp.up_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.16.post_attention_layernorm.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.16.self_attn.k_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.16.self_attn.o_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.16.self_attn.q_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.16.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00007.bin", + "model.layers.16.self_attn.v_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.17.input_layernorm.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.17.mlp.down_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.17.mlp.gate_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.17.mlp.up_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.17.post_attention_layernorm.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.17.self_attn.k_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.17.self_attn.o_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.17.self_attn.q_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.17.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00007.bin", + "model.layers.17.self_attn.v_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.18.input_layernorm.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.18.mlp.down_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.18.mlp.gate_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.18.mlp.up_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.18.post_attention_layernorm.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.18.self_attn.q_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.18.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00007.bin", + "model.layers.18.self_attn.v_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.19.input_layernorm.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.19.mlp.down_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.19.mlp.gate_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.19.mlp.up_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.19.post_attention_layernorm.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.19.self_attn.k_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.19.self_attn.o_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.19.self_attn.q_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.19.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00007.bin", + "model.layers.19.self_attn.v_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.2.input_layernorm.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.2.mlp.down_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.2.mlp.gate_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.2.mlp.up_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.2.post_attention_layernorm.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.2.self_attn.k_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.2.self_attn.o_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.2.self_attn.q_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.2.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00007.bin", + "model.layers.2.self_attn.v_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.20.input_layernorm.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.20.mlp.down_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.20.mlp.gate_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.20.mlp.up_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.20.post_attention_layernorm.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.20.self_attn.k_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.20.self_attn.o_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.20.self_attn.q_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.20.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00007.bin", + "model.layers.20.self_attn.v_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.21.input_layernorm.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.21.mlp.down_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.21.mlp.gate_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.21.mlp.up_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.21.post_attention_layernorm.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.21.self_attn.k_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.21.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00007.bin", + "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.22.input_layernorm.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.22.mlp.down_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.22.mlp.gate_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.22.mlp.up_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.22.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00007.bin", + "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.23.input_layernorm.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.23.mlp.down_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.23.mlp.gate_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.23.mlp.up_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.23.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00007.bin", + "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.24.input_layernorm.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.24.mlp.down_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.24.mlp.gate_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.24.mlp.up_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.24.post_attention_layernorm.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.24.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00007.bin", + "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.25.input_layernorm.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.25.mlp.down_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.25.mlp.gate_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.25.mlp.up_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.25.post_attention_layernorm.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.25.self_attn.k_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.25.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00007.bin", + "model.layers.25.self_attn.v_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.26.input_layernorm.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.26.mlp.down_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.26.mlp.gate_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.26.mlp.up_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.26.post_attention_layernorm.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.26.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00007.bin", + "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.27.input_layernorm.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.27.mlp.down_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.27.mlp.gate_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.27.mlp.up_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.27.post_attention_layernorm.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.27.self_attn.k_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.27.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00007.bin", + "model.layers.27.self_attn.v_proj.weight": "pytorch_model-00003-of-00007.bin", + "model.layers.28.input_layernorm.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.28.mlp.down_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.28.mlp.gate_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.28.mlp.up_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.28.post_attention_layernorm.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.28.self_attn.k_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.28.self_attn.o_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.28.self_attn.q_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.28.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00007.bin", + "model.layers.28.self_attn.v_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.29.input_layernorm.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.29.mlp.down_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.29.mlp.gate_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.29.mlp.up_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.29.post_attention_layernorm.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.29.self_attn.k_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.29.self_attn.o_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.29.self_attn.q_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.29.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00007.bin", + "model.layers.29.self_attn.v_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.3.input_layernorm.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.3.mlp.down_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.3.mlp.gate_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.3.mlp.up_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.3.post_attention_layernorm.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.3.self_attn.k_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.3.self_attn.o_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.3.self_attn.q_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.3.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00007.bin", + "model.layers.3.self_attn.v_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.30.input_layernorm.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.30.mlp.down_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.30.mlp.gate_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.30.mlp.up_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.30.post_attention_layernorm.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.30.self_attn.k_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.30.self_attn.o_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.30.self_attn.q_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.30.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00007.bin", + "model.layers.30.self_attn.v_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.31.input_layernorm.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.31.mlp.down_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.31.mlp.gate_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.31.mlp.up_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.31.post_attention_layernorm.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.31.self_attn.k_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.31.self_attn.o_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.31.self_attn.q_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.31.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00007.bin", + "model.layers.31.self_attn.v_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.32.input_layernorm.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.32.mlp.down_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.32.mlp.gate_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.32.mlp.up_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.32.post_attention_layernorm.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.32.self_attn.k_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.32.self_attn.o_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.32.self_attn.q_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.32.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00007.bin", + "model.layers.32.self_attn.v_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.33.input_layernorm.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.33.mlp.down_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.33.mlp.gate_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.33.mlp.up_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.33.post_attention_layernorm.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.33.self_attn.k_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.33.self_attn.o_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.33.self_attn.q_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.33.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00007.bin", + "model.layers.33.self_attn.v_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.34.input_layernorm.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.34.mlp.down_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.34.mlp.gate_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.34.mlp.up_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.34.post_attention_layernorm.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.34.self_attn.k_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.34.self_attn.o_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.34.self_attn.q_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.34.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00007.bin", + "model.layers.34.self_attn.v_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.35.input_layernorm.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.35.mlp.down_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.35.mlp.gate_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.35.mlp.up_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.35.post_attention_layernorm.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.35.self_attn.k_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.35.self_attn.o_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.35.self_attn.q_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.35.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00007.bin", + "model.layers.35.self_attn.v_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.36.input_layernorm.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.36.mlp.down_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.36.mlp.gate_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.36.mlp.up_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.36.post_attention_layernorm.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.36.self_attn.k_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.36.self_attn.o_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.36.self_attn.q_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.36.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00007.bin", + "model.layers.36.self_attn.v_proj.weight": "pytorch_model-00004-of-00007.bin", + "model.layers.37.input_layernorm.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.37.mlp.down_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.37.mlp.gate_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.37.mlp.up_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.37.post_attention_layernorm.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.37.self_attn.k_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.37.self_attn.o_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.37.self_attn.q_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.37.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00007.bin", + "model.layers.37.self_attn.v_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.38.input_layernorm.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.38.mlp.down_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.38.mlp.gate_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.38.mlp.up_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.38.post_attention_layernorm.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.38.self_attn.k_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.38.self_attn.o_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.38.self_attn.q_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.38.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00007.bin", + "model.layers.38.self_attn.v_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.39.input_layernorm.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.39.mlp.down_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.39.mlp.gate_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.39.mlp.up_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.39.post_attention_layernorm.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.39.self_attn.k_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.39.self_attn.o_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.39.self_attn.q_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.39.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00007.bin", + "model.layers.39.self_attn.v_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.4.input_layernorm.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.4.mlp.down_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.4.mlp.gate_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.4.mlp.up_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.4.post_attention_layernorm.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.4.self_attn.k_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.4.self_attn.o_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.4.self_attn.q_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.4.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00007.bin", + "model.layers.4.self_attn.v_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.40.input_layernorm.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.40.mlp.down_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.40.mlp.gate_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.40.mlp.up_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.40.post_attention_layernorm.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.40.self_attn.k_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.40.self_attn.o_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.40.self_attn.q_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.40.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00007.bin", + "model.layers.40.self_attn.v_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.41.input_layernorm.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.41.mlp.down_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.41.mlp.gate_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.41.mlp.up_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.41.post_attention_layernorm.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.41.self_attn.k_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.41.self_attn.o_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.41.self_attn.q_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.41.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00007.bin", + "model.layers.41.self_attn.v_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.42.input_layernorm.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.42.mlp.down_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.42.mlp.gate_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.42.mlp.up_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.42.post_attention_layernorm.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.42.self_attn.k_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.42.self_attn.o_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.42.self_attn.q_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.42.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00007.bin", + "model.layers.42.self_attn.v_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.43.input_layernorm.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.43.mlp.down_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.43.mlp.gate_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.43.mlp.up_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.43.post_attention_layernorm.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.43.self_attn.k_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.43.self_attn.o_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.43.self_attn.q_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.43.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00007.bin", + "model.layers.43.self_attn.v_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.44.input_layernorm.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.44.mlp.down_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.44.mlp.gate_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.44.mlp.up_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.44.post_attention_layernorm.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.44.self_attn.k_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.44.self_attn.o_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.44.self_attn.q_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.44.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00007.bin", + "model.layers.44.self_attn.v_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.45.input_layernorm.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.45.mlp.down_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.45.mlp.gate_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.45.mlp.up_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.45.post_attention_layernorm.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.45.self_attn.k_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.45.self_attn.o_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.45.self_attn.q_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.45.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00007.bin", + "model.layers.45.self_attn.v_proj.weight": "pytorch_model-00005-of-00007.bin", + "model.layers.46.input_layernorm.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.46.mlp.down_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.46.mlp.gate_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.46.mlp.up_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.46.post_attention_layernorm.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.46.self_attn.k_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.46.self_attn.o_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.46.self_attn.q_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.46.self_attn.rotary_emb.inv_freq": "pytorch_model-00006-of-00007.bin", + "model.layers.46.self_attn.v_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.47.input_layernorm.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.47.mlp.down_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.47.mlp.gate_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.47.mlp.up_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.47.post_attention_layernorm.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.47.self_attn.k_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.47.self_attn.o_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.47.self_attn.q_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.47.self_attn.rotary_emb.inv_freq": "pytorch_model-00006-of-00007.bin", + "model.layers.47.self_attn.v_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.48.input_layernorm.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.48.mlp.down_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.48.mlp.gate_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.48.mlp.up_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.48.post_attention_layernorm.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.48.self_attn.k_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.48.self_attn.o_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.48.self_attn.q_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.48.self_attn.rotary_emb.inv_freq": "pytorch_model-00006-of-00007.bin", + "model.layers.48.self_attn.v_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.49.input_layernorm.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.49.mlp.down_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.49.mlp.gate_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.49.mlp.up_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.49.post_attention_layernorm.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.49.self_attn.k_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.49.self_attn.o_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.49.self_attn.q_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.49.self_attn.rotary_emb.inv_freq": "pytorch_model-00006-of-00007.bin", + "model.layers.49.self_attn.v_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.5.input_layernorm.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.5.mlp.down_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.5.mlp.gate_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.5.mlp.up_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.5.post_attention_layernorm.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.5.self_attn.k_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.5.self_attn.o_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.5.self_attn.q_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.5.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00007.bin", + "model.layers.5.self_attn.v_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.50.input_layernorm.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.50.mlp.down_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.50.mlp.gate_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.50.mlp.up_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.50.post_attention_layernorm.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.50.self_attn.k_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.50.self_attn.o_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.50.self_attn.q_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.50.self_attn.rotary_emb.inv_freq": "pytorch_model-00006-of-00007.bin", + "model.layers.50.self_attn.v_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.51.input_layernorm.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.51.mlp.down_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.51.mlp.gate_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.51.mlp.up_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.51.post_attention_layernorm.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.51.self_attn.k_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.51.self_attn.o_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.51.self_attn.q_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.51.self_attn.rotary_emb.inv_freq": "pytorch_model-00006-of-00007.bin", + "model.layers.51.self_attn.v_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.52.input_layernorm.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.52.mlp.down_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.52.mlp.gate_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.52.mlp.up_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.52.post_attention_layernorm.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.52.self_attn.k_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.52.self_attn.o_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.52.self_attn.q_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.52.self_attn.rotary_emb.inv_freq": "pytorch_model-00006-of-00007.bin", + "model.layers.52.self_attn.v_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.53.input_layernorm.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.53.mlp.down_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.53.mlp.gate_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.53.mlp.up_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.53.post_attention_layernorm.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.53.self_attn.k_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.53.self_attn.o_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.53.self_attn.q_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.53.self_attn.rotary_emb.inv_freq": "pytorch_model-00006-of-00007.bin", + "model.layers.53.self_attn.v_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.54.input_layernorm.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.54.mlp.down_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.54.mlp.gate_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.54.mlp.up_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.54.post_attention_layernorm.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.54.self_attn.k_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.54.self_attn.o_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.54.self_attn.q_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.54.self_attn.rotary_emb.inv_freq": "pytorch_model-00006-of-00007.bin", + "model.layers.54.self_attn.v_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.55.input_layernorm.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.55.mlp.down_proj.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.55.mlp.gate_proj.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.55.mlp.up_proj.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.55.post_attention_layernorm.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.55.self_attn.k_proj.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.55.self_attn.o_proj.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.55.self_attn.q_proj.weight": "pytorch_model-00006-of-00007.bin", + "model.layers.55.self_attn.rotary_emb.inv_freq": "pytorch_model-00007-of-00007.bin", + "model.layers.55.self_attn.v_proj.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.56.input_layernorm.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.56.mlp.down_proj.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.56.mlp.gate_proj.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.56.mlp.up_proj.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.56.post_attention_layernorm.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.56.self_attn.k_proj.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.56.self_attn.o_proj.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.56.self_attn.q_proj.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.56.self_attn.rotary_emb.inv_freq": "pytorch_model-00007-of-00007.bin", + "model.layers.56.self_attn.v_proj.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.57.input_layernorm.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.57.mlp.down_proj.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.57.mlp.gate_proj.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.57.mlp.up_proj.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.57.post_attention_layernorm.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.57.self_attn.k_proj.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.57.self_attn.o_proj.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.57.self_attn.q_proj.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.57.self_attn.rotary_emb.inv_freq": "pytorch_model-00007-of-00007.bin", + "model.layers.57.self_attn.v_proj.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.58.input_layernorm.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.58.mlp.down_proj.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.58.mlp.gate_proj.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.58.mlp.up_proj.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.58.post_attention_layernorm.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.58.self_attn.k_proj.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.58.self_attn.o_proj.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.58.self_attn.q_proj.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.58.self_attn.rotary_emb.inv_freq": "pytorch_model-00007-of-00007.bin", + "model.layers.58.self_attn.v_proj.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.59.input_layernorm.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.59.mlp.down_proj.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.59.mlp.gate_proj.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.59.mlp.up_proj.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.59.post_attention_layernorm.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.59.self_attn.k_proj.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.59.self_attn.o_proj.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.59.self_attn.q_proj.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.59.self_attn.rotary_emb.inv_freq": "pytorch_model-00007-of-00007.bin", + "model.layers.59.self_attn.v_proj.weight": "pytorch_model-00007-of-00007.bin", + "model.layers.6.input_layernorm.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.6.mlp.down_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.6.mlp.gate_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.6.mlp.up_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.6.post_attention_layernorm.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.6.self_attn.k_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.6.self_attn.o_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.6.self_attn.q_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.6.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00007.bin", + "model.layers.6.self_attn.v_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.7.input_layernorm.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.7.mlp.down_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.7.mlp.gate_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.7.mlp.up_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.7.post_attention_layernorm.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.7.self_attn.k_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.7.self_attn.o_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.7.self_attn.q_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.7.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00007.bin", + "model.layers.7.self_attn.v_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.8.input_layernorm.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.8.mlp.down_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.8.mlp.gate_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.8.mlp.up_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.8.post_attention_layernorm.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.8.self_attn.k_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.8.self_attn.o_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.8.self_attn.q_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.8.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00007.bin", + "model.layers.8.self_attn.v_proj.weight": "pytorch_model-00001-of-00007.bin", + "model.layers.9.input_layernorm.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.9.mlp.down_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.9.mlp.gate_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.9.mlp.up_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.9.post_attention_layernorm.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.9.self_attn.k_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.layers.9.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00007.bin", + "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00002-of-00007.bin", + "model.norm.weight": "pytorch_model-00007-of-00007.bin" + } +} diff --git a/llama-hf/30B/special_tokens_map.json b/llama-hf/30B/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/llama-hf/30B/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/llama-hf/30B/tokenizer.model b/llama-hf/30B/tokenizer.model new file mode 120000 index 0000000000000000000000000000000000000000..16479c4d5ef98c12b179e1557bb3d4039757d83b --- /dev/null +++ b/llama-hf/30B/tokenizer.model @@ -0,0 +1 @@ +../../.git/annex/objects/m9/qj/SHA256E-s499723--9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347/SHA256E-s499723--9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 \ No newline at end of file diff --git a/llama-hf/30B/tokenizer_config.json b/llama-hf/30B/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a54b01aa3699f19e1aea416fc337f910f60c6839 --- /dev/null +++ b/llama-hf/30B/tokenizer_config.json @@ -0,0 +1 @@ +{"bos_token": "", "eos_token": "", "model_max_length": 1000000000000000019884624838656, "tokenizer_class": "LlamaTokenizer", "unk_token": ""} \ No newline at end of file diff --git a/llama-hf/65B/config.json b/llama-hf/65B/config.json new file mode 100644 index 0000000000000000000000000000000000000000..22cd97fce62cdd936e3b03c9c21a92a6f299ed57 --- /dev/null +++ b/llama-hf/65B/config.json @@ -0,0 +1,22 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": 22016, + "max_position_embeddings": 2048, + "model_type": "llama", + "num_attention_heads": 64, + "num_hidden_layers": 80, + "pad_token_id": 0, + "rms_norm_eps": 1e-05, + "tie_word_embeddings": false, + "torch_dtype": "float16", + "transformers_version": "4.28.0.dev0", + "use_cache": true, + "vocab_size": 32000 +} diff --git a/llama-hf/65B/generation_config.json b/llama-hf/65B/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..55d7b5b6db760f8c1963be3d56a3bc363bacdfb1 --- /dev/null +++ b/llama-hf/65B/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "pad_token_id": 0, + "transformers_version": "4.28.0.dev0" +} diff --git a/llama-hf/65B/model-00001-of-00014.safetensors b/llama-hf/65B/model-00001-of-00014.safetensors new file mode 120000 index 0000000000000000000000000000000000000000..068b7c63201ea4065f6fb5fb864ca0f2332b6c0f --- /dev/null +++ b/llama-hf/65B/model-00001-of-00014.safetensors @@ -0,0 +1 @@ +../../.git/annex/objects/q4/vp/SHA256E-s9877757900--cae08880e5397b47da7d6e20de892c843a7726c82d0e813da77621169e06984e/SHA256E-s9877757900--cae08880e5397b47da7d6e20de892c843a7726c82d0e813da77621169e06984e \ No newline at end of file diff --git a/llama-hf/65B/model-00002-of-00014.safetensors b/llama-hf/65B/model-00002-of-00014.safetensors new file mode 120000 index 0000000000000000000000000000000000000000..005195cccc378302f60c8b39616191e5bb53e5da --- /dev/null +++ b/llama-hf/65B/model-00002-of-00014.safetensors @@ -0,0 +1 @@ +../../.git/annex/objects/vX/W5/SHA256E-s9714213056--2d2484879634cd1c9b79ab12b1952481e59381523a4fecbea306432660c2631e/SHA256E-s9714213056--2d2484879634cd1c9b79ab12b1952481e59381523a4fecbea306432660c2631e \ No newline at end of file diff --git a/llama-hf/65B/model-00003-of-00014.safetensors b/llama-hf/65B/model-00003-of-00014.safetensors new file mode 120000 index 0000000000000000000000000000000000000000..53b60a2c8b28f14622bc2458e88d3926676854e6 --- /dev/null +++ b/llama-hf/65B/model-00003-of-00014.safetensors @@ -0,0 +1 @@ +../../.git/annex/objects/G9/v7/SHA256E-s9714213094--e33e150132b4faa28eeddbb74460e36bcf7355475cdfbf6169fc20aae7c3b57d/SHA256E-s9714213094--e33e150132b4faa28eeddbb74460e36bcf7355475cdfbf6169fc20aae7c3b57d \ No newline at end of file diff --git a/llama-hf/65B/model-00004-of-00014.safetensors b/llama-hf/65B/model-00004-of-00014.safetensors new file mode 120000 index 0000000000000000000000000000000000000000..06ce119a0d1933642d1f00b3947ef485cfa95a4a --- /dev/null +++ b/llama-hf/65B/model-00004-of-00014.safetensors @@ -0,0 +1 @@ +../../.git/annex/objects/Gq/0K/SHA256E-s9714213094--ee55c6f24ece37f55a78bf67a221f7b2be3559439581f62b73f52472f4f2b50b/SHA256E-s9714213094--ee55c6f24ece37f55a78bf67a221f7b2be3559439581f62b73f52472f4f2b50b \ No newline at end of file diff --git a/llama-hf/65B/model-00005-of-00014.safetensors b/llama-hf/65B/model-00005-of-00014.safetensors new file mode 120000 index 0000000000000000000000000000000000000000..e94f6a6979a9f719412cadb0300a79dfc777c823 --- /dev/null +++ b/llama-hf/65B/model-00005-of-00014.safetensors @@ -0,0 +1 @@ +../../.git/annex/objects/5G/Pm/SHA256E-s9714213094--4b82751d4563f04d49eac2239df88d131ef45518662413c198c864ffb00fecca/SHA256E-s9714213094--4b82751d4563f04d49eac2239df88d131ef45518662413c198c864ffb00fecca \ No newline at end of file diff --git a/llama-hf/65B/model-00006-of-00014.safetensors b/llama-hf/65B/model-00006-of-00014.safetensors new file mode 120000 index 0000000000000000000000000000000000000000..3b9d870deb5bbf5c9cbe9734ac5a2448eb427350 --- /dev/null +++ b/llama-hf/65B/model-00006-of-00014.safetensors @@ -0,0 +1 @@ +../../.git/annex/objects/q1/0W/SHA256E-s9714213094--7c71e72849a87cbc4d83148575a8509d2583e9f8ab3c02a434e5d48e3fc5f8d3/SHA256E-s9714213094--7c71e72849a87cbc4d83148575a8509d2583e9f8ab3c02a434e5d48e3fc5f8d3 \ No newline at end of file diff --git a/llama-hf/65B/model-00007-of-00014.safetensors b/llama-hf/65B/model-00007-of-00014.safetensors new file mode 120000 index 0000000000000000000000000000000000000000..90e896d24dc29ad2172e2dabf984ada46289dea6 --- /dev/null +++ b/llama-hf/65B/model-00007-of-00014.safetensors @@ -0,0 +1 @@ +../../.git/annex/objects/MW/22/SHA256E-s9714213094--71b83757f30a27b71a697a760ceed521465e339f144c91371d16fe8a526ba59f/SHA256E-s9714213094--71b83757f30a27b71a697a760ceed521465e339f144c91371d16fe8a526ba59f \ No newline at end of file diff --git a/llama-hf/65B/model-00008-of-00014.safetensors b/llama-hf/65B/model-00008-of-00014.safetensors new file mode 120000 index 0000000000000000000000000000000000000000..085f2d4b602c077a23b6a6d105045432cfac7aae --- /dev/null +++ b/llama-hf/65B/model-00008-of-00014.safetensors @@ -0,0 +1 @@ +../../.git/annex/objects/P7/4V/SHA256E-s9714213094--c79ede3faabed89a6dd63638b3322f1f00e404cf5f99e47a5c7343d5cb916fa9/SHA256E-s9714213094--c79ede3faabed89a6dd63638b3322f1f00e404cf5f99e47a5c7343d5cb916fa9 \ No newline at end of file diff --git a/llama-hf/65B/model-00009-of-00014.safetensors b/llama-hf/65B/model-00009-of-00014.safetensors new file mode 120000 index 0000000000000000000000000000000000000000..29a43ab07f0fda0b598fbbb8d2e5adca8cb457a4 --- /dev/null +++ b/llama-hf/65B/model-00009-of-00014.safetensors @@ -0,0 +1 @@ +../../.git/annex/objects/06/24/SHA256E-s9714213094--76dfb8294513314b0b7d706622009a3fb0a0c60e36aee8d4ad39ba4712eb0923/SHA256E-s9714213094--76dfb8294513314b0b7d706622009a3fb0a0c60e36aee8d4ad39ba4712eb0923 \ No newline at end of file diff --git a/llama-hf/65B/model-00010-of-00014.safetensors b/llama-hf/65B/model-00010-of-00014.safetensors new file mode 120000 index 0000000000000000000000000000000000000000..4ad65e3a319ac293c8e49cfa66057afcda718192 --- /dev/null +++ b/llama-hf/65B/model-00010-of-00014.safetensors @@ -0,0 +1 @@ +../../.git/annex/objects/8X/54/SHA256E-s9714213094--e429525609422608e4f1dd7b6e776072ac7342ccab830c78dedf24e41b9cdf45/SHA256E-s9714213094--e429525609422608e4f1dd7b6e776072ac7342ccab830c78dedf24e41b9cdf45 \ No newline at end of file diff --git a/llama-hf/65B/model-00011-of-00014.safetensors b/llama-hf/65B/model-00011-of-00014.safetensors new file mode 120000 index 0000000000000000000000000000000000000000..ca6b77722d2a8b9b9b5814018b7f752f8567e0fe --- /dev/null +++ b/llama-hf/65B/model-00011-of-00014.safetensors @@ -0,0 +1 @@ +../../.git/annex/objects/14/m8/SHA256E-s9714213094--7d766a434b1bb7e0bad93bfabdefc6437240647c1a1d356814d80af8fea862a4/SHA256E-s9714213094--7d766a434b1bb7e0bad93bfabdefc6437240647c1a1d356814d80af8fea862a4 \ No newline at end of file diff --git a/llama-hf/65B/model-00012-of-00014.safetensors b/llama-hf/65B/model-00012-of-00014.safetensors new file mode 120000 index 0000000000000000000000000000000000000000..5a87f7f1d3baef2d4a05be956c0b4545c86ca607 --- /dev/null +++ b/llama-hf/65B/model-00012-of-00014.safetensors @@ -0,0 +1 @@ +../../.git/annex/objects/vk/13/SHA256E-s9714213094--3c1bec00953659bc1304cf6fa57b8bf9e4fbd9038aebca716de31a55ad6803ce/SHA256E-s9714213094--3c1bec00953659bc1304cf6fa57b8bf9e4fbd9038aebca716de31a55ad6803ce \ No newline at end of file diff --git a/llama-hf/65B/model-00013-of-00014.safetensors b/llama-hf/65B/model-00013-of-00014.safetensors new file mode 120000 index 0000000000000000000000000000000000000000..42e57f86c81437842e9e293823c554db65285439 --- /dev/null +++ b/llama-hf/65B/model-00013-of-00014.safetensors @@ -0,0 +1 @@ +../../.git/annex/objects/Qx/4f/SHA256E-s9714213094--a3b2b4f31afe6b1823bb6b10ecf54ca173ea28c4d3d5615a3710ed430f7ae497/SHA256E-s9714213094--a3b2b4f31afe6b1823bb6b10ecf54ca173ea28c4d3d5615a3710ed430f7ae497 \ No newline at end of file diff --git a/llama-hf/65B/model-00014-of-00014.safetensors b/llama-hf/65B/model-00014-of-00014.safetensors new file mode 120000 index 0000000000000000000000000000000000000000..f9998142ffeb9f9be33c479d442ed9add9f2a361 --- /dev/null +++ b/llama-hf/65B/model-00014-of-00014.safetensors @@ -0,0 +1 @@ +../../.git/annex/objects/6m/zx/SHA256E-s4123118872--dc9ff981164320e3d689b97caa362b760ec7ac45c631ce5ad6db7b89e12ba0f0/SHA256E-s4123118872--dc9ff981164320e3d689b97caa362b760ec7ac45c631ce5ad6db7b89e12ba0f0 \ No newline at end of file diff --git a/llama-hf/65B/model.safetensors.index.json b/llama-hf/65B/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..44af90fcf76369b9529791bc7ed2327625f6cd91 --- /dev/null +++ b/llama-hf/65B/model.safetensors.index.json @@ -0,0 +1,810 @@ +{ + "metadata": { + "total_size": 130571341824 + }, + "weight_map": { + "lm_head.weight": "model-00014-of-00014.safetensors", + "model.embed_tokens.weight": "model-00001-of-00014.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00014.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00014.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.0.self_attn.rotary_emb.inv_freq": "model-00001-of-00014.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00014.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00014.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.1.self_attn.rotary_emb.inv_freq": "model-00001-of-00014.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00014.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00014.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.10.self_attn.rotary_emb.inv_freq": "model-00002-of-00014.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.11.input_layernorm.weight": "model-00003-of-00014.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00003-of-00014.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.11.self_attn.rotary_emb.inv_freq": "model-00002-of-00014.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.12.input_layernorm.weight": "model-00003-of-00014.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00003-of-00014.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.12.self_attn.rotary_emb.inv_freq": "model-00003-of-00014.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.13.input_layernorm.weight": "model-00003-of-00014.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00003-of-00014.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.13.self_attn.rotary_emb.inv_freq": "model-00003-of-00014.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.14.input_layernorm.weight": "model-00003-of-00014.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00003-of-00014.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.14.self_attn.rotary_emb.inv_freq": "model-00003-of-00014.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.15.input_layernorm.weight": "model-00003-of-00014.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00003-of-00014.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.15.self_attn.rotary_emb.inv_freq": "model-00003-of-00014.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.16.input_layernorm.weight": "model-00003-of-00014.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00003-of-00014.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.16.self_attn.rotary_emb.inv_freq": "model-00003-of-00014.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.17.input_layernorm.weight": "model-00004-of-00014.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00004-of-00014.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.17.self_attn.rotary_emb.inv_freq": "model-00003-of-00014.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00003-of-00014.safetensors", + "model.layers.18.input_layernorm.weight": "model-00004-of-00014.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00004-of-00014.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.18.self_attn.rotary_emb.inv_freq": "model-00004-of-00014.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.19.input_layernorm.weight": "model-00004-of-00014.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00004-of-00014.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.19.self_attn.rotary_emb.inv_freq": "model-00004-of-00014.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00014.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00014.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.2.self_attn.rotary_emb.inv_freq": "model-00001-of-00014.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.20.input_layernorm.weight": "model-00004-of-00014.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00004-of-00014.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.20.self_attn.rotary_emb.inv_freq": "model-00004-of-00014.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.21.input_layernorm.weight": "model-00004-of-00014.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00004-of-00014.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.21.self_attn.rotary_emb.inv_freq": "model-00004-of-00014.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.22.input_layernorm.weight": "model-00004-of-00014.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00004-of-00014.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.22.self_attn.rotary_emb.inv_freq": "model-00004-of-00014.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.23.input_layernorm.weight": "model-00005-of-00014.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00005-of-00014.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.23.self_attn.rotary_emb.inv_freq": "model-00004-of-00014.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00004-of-00014.safetensors", + "model.layers.24.input_layernorm.weight": "model-00005-of-00014.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00005-of-00014.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.24.self_attn.rotary_emb.inv_freq": "model-00005-of-00014.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.25.input_layernorm.weight": "model-00005-of-00014.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00005-of-00014.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.25.self_attn.rotary_emb.inv_freq": "model-00005-of-00014.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.26.input_layernorm.weight": "model-00005-of-00014.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00005-of-00014.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.26.self_attn.rotary_emb.inv_freq": "model-00005-of-00014.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.27.input_layernorm.weight": "model-00005-of-00014.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00005-of-00014.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.27.self_attn.rotary_emb.inv_freq": "model-00005-of-00014.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.28.input_layernorm.weight": "model-00005-of-00014.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00005-of-00014.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.28.self_attn.rotary_emb.inv_freq": "model-00005-of-00014.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.29.input_layernorm.weight": "model-00006-of-00014.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00006-of-00014.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.29.self_attn.rotary_emb.inv_freq": "model-00005-of-00014.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00005-of-00014.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00014.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00014.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.3.self_attn.rotary_emb.inv_freq": "model-00001-of-00014.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.30.input_layernorm.weight": "model-00006-of-00014.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00006-of-00014.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.30.self_attn.rotary_emb.inv_freq": "model-00006-of-00014.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.31.input_layernorm.weight": "model-00006-of-00014.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00006-of-00014.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.31.self_attn.rotary_emb.inv_freq": "model-00006-of-00014.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.32.input_layernorm.weight": "model-00006-of-00014.safetensors", + "model.layers.32.mlp.down_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.32.mlp.gate_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.32.mlp.up_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00006-of-00014.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.32.self_attn.rotary_emb.inv_freq": "model-00006-of-00014.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.33.input_layernorm.weight": "model-00006-of-00014.safetensors", + "model.layers.33.mlp.down_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.33.mlp.gate_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.33.mlp.up_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00006-of-00014.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.33.self_attn.rotary_emb.inv_freq": "model-00006-of-00014.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.34.input_layernorm.weight": "model-00006-of-00014.safetensors", + "model.layers.34.mlp.down_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.34.mlp.gate_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.34.mlp.up_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00006-of-00014.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.34.self_attn.rotary_emb.inv_freq": "model-00006-of-00014.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.35.input_layernorm.weight": "model-00007-of-00014.safetensors", + "model.layers.35.mlp.down_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.35.mlp.gate_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.35.mlp.up_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00007-of-00014.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.35.self_attn.rotary_emb.inv_freq": "model-00006-of-00014.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00006-of-00014.safetensors", + "model.layers.36.input_layernorm.weight": "model-00007-of-00014.safetensors", + "model.layers.36.mlp.down_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.36.mlp.gate_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.36.mlp.up_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00007-of-00014.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.36.self_attn.rotary_emb.inv_freq": "model-00007-of-00014.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.37.input_layernorm.weight": "model-00007-of-00014.safetensors", + "model.layers.37.mlp.down_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.37.mlp.gate_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.37.mlp.up_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00007-of-00014.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.37.self_attn.rotary_emb.inv_freq": "model-00007-of-00014.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.38.input_layernorm.weight": "model-00007-of-00014.safetensors", + "model.layers.38.mlp.down_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.38.mlp.gate_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.38.mlp.up_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00007-of-00014.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.38.self_attn.rotary_emb.inv_freq": "model-00007-of-00014.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.39.input_layernorm.weight": "model-00007-of-00014.safetensors", + "model.layers.39.mlp.down_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.39.mlp.gate_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.39.mlp.up_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00007-of-00014.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.39.self_attn.rotary_emb.inv_freq": "model-00007-of-00014.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00014.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00014.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.4.self_attn.rotary_emb.inv_freq": "model-00001-of-00014.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.40.input_layernorm.weight": "model-00007-of-00014.safetensors", + "model.layers.40.mlp.down_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.40.mlp.gate_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.40.mlp.up_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00007-of-00014.safetensors", + "model.layers.40.self_attn.k_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.40.self_attn.q_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.40.self_attn.rotary_emb.inv_freq": "model-00007-of-00014.safetensors", + "model.layers.40.self_attn.v_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.41.input_layernorm.weight": "model-00008-of-00014.safetensors", + "model.layers.41.mlp.down_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.41.mlp.gate_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.41.mlp.up_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00008-of-00014.safetensors", + "model.layers.41.self_attn.k_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.41.self_attn.q_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.41.self_attn.rotary_emb.inv_freq": "model-00007-of-00014.safetensors", + "model.layers.41.self_attn.v_proj.weight": "model-00007-of-00014.safetensors", + "model.layers.42.input_layernorm.weight": "model-00008-of-00014.safetensors", + "model.layers.42.mlp.down_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.42.mlp.gate_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.42.mlp.up_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.42.post_attention_layernorm.weight": "model-00008-of-00014.safetensors", + "model.layers.42.self_attn.k_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.42.self_attn.o_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.42.self_attn.q_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.42.self_attn.rotary_emb.inv_freq": "model-00008-of-00014.safetensors", + "model.layers.42.self_attn.v_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.43.input_layernorm.weight": "model-00008-of-00014.safetensors", + "model.layers.43.mlp.down_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.43.mlp.gate_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.43.mlp.up_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.43.post_attention_layernorm.weight": "model-00008-of-00014.safetensors", + "model.layers.43.self_attn.k_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.43.self_attn.o_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.43.self_attn.q_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.43.self_attn.rotary_emb.inv_freq": "model-00008-of-00014.safetensors", + "model.layers.43.self_attn.v_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.44.input_layernorm.weight": "model-00008-of-00014.safetensors", + "model.layers.44.mlp.down_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.44.mlp.gate_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.44.mlp.up_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.44.post_attention_layernorm.weight": "model-00008-of-00014.safetensors", + "model.layers.44.self_attn.k_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.44.self_attn.o_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.44.self_attn.q_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.44.self_attn.rotary_emb.inv_freq": "model-00008-of-00014.safetensors", + "model.layers.44.self_attn.v_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.45.input_layernorm.weight": "model-00008-of-00014.safetensors", + "model.layers.45.mlp.down_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.45.mlp.gate_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.45.mlp.up_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.45.post_attention_layernorm.weight": "model-00008-of-00014.safetensors", + "model.layers.45.self_attn.k_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.45.self_attn.o_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.45.self_attn.q_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.45.self_attn.rotary_emb.inv_freq": "model-00008-of-00014.safetensors", + "model.layers.45.self_attn.v_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.46.input_layernorm.weight": "model-00008-of-00014.safetensors", + "model.layers.46.mlp.down_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.46.mlp.gate_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.46.mlp.up_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.46.post_attention_layernorm.weight": "model-00008-of-00014.safetensors", + "model.layers.46.self_attn.k_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.46.self_attn.o_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.46.self_attn.q_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.46.self_attn.rotary_emb.inv_freq": "model-00008-of-00014.safetensors", + "model.layers.46.self_attn.v_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.47.input_layernorm.weight": "model-00009-of-00014.safetensors", + "model.layers.47.mlp.down_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.47.mlp.gate_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.47.mlp.up_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.47.post_attention_layernorm.weight": "model-00009-of-00014.safetensors", + "model.layers.47.self_attn.k_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.47.self_attn.o_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.47.self_attn.q_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.47.self_attn.rotary_emb.inv_freq": "model-00008-of-00014.safetensors", + "model.layers.47.self_attn.v_proj.weight": "model-00008-of-00014.safetensors", + "model.layers.48.input_layernorm.weight": "model-00009-of-00014.safetensors", + "model.layers.48.mlp.down_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.48.mlp.gate_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.48.mlp.up_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.48.post_attention_layernorm.weight": "model-00009-of-00014.safetensors", + "model.layers.48.self_attn.k_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.48.self_attn.o_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.48.self_attn.q_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.48.self_attn.rotary_emb.inv_freq": "model-00009-of-00014.safetensors", + "model.layers.48.self_attn.v_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.49.input_layernorm.weight": "model-00009-of-00014.safetensors", + "model.layers.49.mlp.down_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.49.mlp.gate_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.49.mlp.up_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.49.post_attention_layernorm.weight": "model-00009-of-00014.safetensors", + "model.layers.49.self_attn.k_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.49.self_attn.o_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.49.self_attn.q_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.49.self_attn.rotary_emb.inv_freq": "model-00009-of-00014.safetensors", + "model.layers.49.self_attn.v_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.5.input_layernorm.weight": "model-00002-of-00014.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00014.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.5.self_attn.rotary_emb.inv_freq": "model-00001-of-00014.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00014.safetensors", + "model.layers.50.input_layernorm.weight": "model-00009-of-00014.safetensors", + "model.layers.50.mlp.down_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.50.mlp.gate_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.50.mlp.up_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.50.post_attention_layernorm.weight": "model-00009-of-00014.safetensors", + "model.layers.50.self_attn.k_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.50.self_attn.o_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.50.self_attn.q_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.50.self_attn.rotary_emb.inv_freq": "model-00009-of-00014.safetensors", + "model.layers.50.self_attn.v_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.51.input_layernorm.weight": "model-00009-of-00014.safetensors", + "model.layers.51.mlp.down_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.51.mlp.gate_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.51.mlp.up_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.51.post_attention_layernorm.weight": "model-00009-of-00014.safetensors", + "model.layers.51.self_attn.k_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.51.self_attn.o_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.51.self_attn.q_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.51.self_attn.rotary_emb.inv_freq": "model-00009-of-00014.safetensors", + "model.layers.51.self_attn.v_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.52.input_layernorm.weight": "model-00009-of-00014.safetensors", + "model.layers.52.mlp.down_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.52.mlp.gate_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.52.mlp.up_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.52.post_attention_layernorm.weight": "model-00009-of-00014.safetensors", + "model.layers.52.self_attn.k_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.52.self_attn.o_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.52.self_attn.q_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.52.self_attn.rotary_emb.inv_freq": "model-00009-of-00014.safetensors", + "model.layers.52.self_attn.v_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.53.input_layernorm.weight": "model-00010-of-00014.safetensors", + "model.layers.53.mlp.down_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.53.mlp.gate_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.53.mlp.up_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.53.post_attention_layernorm.weight": "model-00010-of-00014.safetensors", + "model.layers.53.self_attn.k_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.53.self_attn.o_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.53.self_attn.q_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.53.self_attn.rotary_emb.inv_freq": "model-00009-of-00014.safetensors", + "model.layers.53.self_attn.v_proj.weight": "model-00009-of-00014.safetensors", + "model.layers.54.input_layernorm.weight": "model-00010-of-00014.safetensors", + "model.layers.54.mlp.down_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.54.mlp.gate_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.54.mlp.up_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.54.post_attention_layernorm.weight": "model-00010-of-00014.safetensors", + "model.layers.54.self_attn.k_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.54.self_attn.o_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.54.self_attn.q_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.54.self_attn.rotary_emb.inv_freq": "model-00010-of-00014.safetensors", + "model.layers.54.self_attn.v_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.55.input_layernorm.weight": "model-00010-of-00014.safetensors", + "model.layers.55.mlp.down_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.55.mlp.gate_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.55.mlp.up_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.55.post_attention_layernorm.weight": "model-00010-of-00014.safetensors", + "model.layers.55.self_attn.k_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.55.self_attn.o_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.55.self_attn.q_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.55.self_attn.rotary_emb.inv_freq": "model-00010-of-00014.safetensors", + "model.layers.55.self_attn.v_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.56.input_layernorm.weight": "model-00010-of-00014.safetensors", + "model.layers.56.mlp.down_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.56.mlp.gate_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.56.mlp.up_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.56.post_attention_layernorm.weight": "model-00010-of-00014.safetensors", + "model.layers.56.self_attn.k_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.56.self_attn.o_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.56.self_attn.q_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.56.self_attn.rotary_emb.inv_freq": "model-00010-of-00014.safetensors", + "model.layers.56.self_attn.v_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.57.input_layernorm.weight": "model-00010-of-00014.safetensors", + "model.layers.57.mlp.down_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.57.mlp.gate_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.57.mlp.up_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.57.post_attention_layernorm.weight": "model-00010-of-00014.safetensors", + "model.layers.57.self_attn.k_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.57.self_attn.o_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.57.self_attn.q_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.57.self_attn.rotary_emb.inv_freq": "model-00010-of-00014.safetensors", + "model.layers.57.self_attn.v_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.58.input_layernorm.weight": "model-00010-of-00014.safetensors", + "model.layers.58.mlp.down_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.58.mlp.gate_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.58.mlp.up_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.58.post_attention_layernorm.weight": "model-00010-of-00014.safetensors", + "model.layers.58.self_attn.k_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.58.self_attn.o_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.58.self_attn.q_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.58.self_attn.rotary_emb.inv_freq": "model-00010-of-00014.safetensors", + "model.layers.58.self_attn.v_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.59.input_layernorm.weight": "model-00011-of-00014.safetensors", + "model.layers.59.mlp.down_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.59.mlp.gate_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.59.mlp.up_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.59.post_attention_layernorm.weight": "model-00011-of-00014.safetensors", + "model.layers.59.self_attn.k_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.59.self_attn.o_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.59.self_attn.q_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.59.self_attn.rotary_emb.inv_freq": "model-00010-of-00014.safetensors", + "model.layers.59.self_attn.v_proj.weight": "model-00010-of-00014.safetensors", + "model.layers.6.input_layernorm.weight": "model-00002-of-00014.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00014.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.6.self_attn.rotary_emb.inv_freq": "model-00002-of-00014.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.60.input_layernorm.weight": "model-00011-of-00014.safetensors", + "model.layers.60.mlp.down_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.60.mlp.gate_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.60.mlp.up_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.60.post_attention_layernorm.weight": "model-00011-of-00014.safetensors", + "model.layers.60.self_attn.k_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.60.self_attn.o_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.60.self_attn.q_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.60.self_attn.rotary_emb.inv_freq": "model-00011-of-00014.safetensors", + "model.layers.60.self_attn.v_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.61.input_layernorm.weight": "model-00011-of-00014.safetensors", + "model.layers.61.mlp.down_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.61.mlp.gate_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.61.mlp.up_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.61.post_attention_layernorm.weight": "model-00011-of-00014.safetensors", + "model.layers.61.self_attn.k_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.61.self_attn.o_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.61.self_attn.q_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.61.self_attn.rotary_emb.inv_freq": "model-00011-of-00014.safetensors", + "model.layers.61.self_attn.v_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.62.input_layernorm.weight": "model-00011-of-00014.safetensors", + "model.layers.62.mlp.down_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.62.mlp.gate_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.62.mlp.up_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.62.post_attention_layernorm.weight": "model-00011-of-00014.safetensors", + "model.layers.62.self_attn.k_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.62.self_attn.o_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.62.self_attn.q_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.62.self_attn.rotary_emb.inv_freq": "model-00011-of-00014.safetensors", + "model.layers.62.self_attn.v_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.63.input_layernorm.weight": "model-00011-of-00014.safetensors", + "model.layers.63.mlp.down_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.63.mlp.gate_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.63.mlp.up_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.63.post_attention_layernorm.weight": "model-00011-of-00014.safetensors", + "model.layers.63.self_attn.k_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.63.self_attn.o_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.63.self_attn.q_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.63.self_attn.rotary_emb.inv_freq": "model-00011-of-00014.safetensors", + "model.layers.63.self_attn.v_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.64.input_layernorm.weight": "model-00011-of-00014.safetensors", + "model.layers.64.mlp.down_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.64.mlp.gate_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.64.mlp.up_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.64.post_attention_layernorm.weight": "model-00011-of-00014.safetensors", + "model.layers.64.self_attn.k_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.64.self_attn.o_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.64.self_attn.q_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.64.self_attn.rotary_emb.inv_freq": "model-00011-of-00014.safetensors", + "model.layers.64.self_attn.v_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.65.input_layernorm.weight": "model-00012-of-00014.safetensors", + "model.layers.65.mlp.down_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.65.mlp.gate_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.65.mlp.up_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.65.post_attention_layernorm.weight": "model-00012-of-00014.safetensors", + "model.layers.65.self_attn.k_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.65.self_attn.o_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.65.self_attn.q_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.65.self_attn.rotary_emb.inv_freq": "model-00011-of-00014.safetensors", + "model.layers.65.self_attn.v_proj.weight": "model-00011-of-00014.safetensors", + "model.layers.66.input_layernorm.weight": "model-00012-of-00014.safetensors", + "model.layers.66.mlp.down_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.66.mlp.gate_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.66.mlp.up_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.66.post_attention_layernorm.weight": "model-00012-of-00014.safetensors", + "model.layers.66.self_attn.k_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.66.self_attn.o_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.66.self_attn.q_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.66.self_attn.rotary_emb.inv_freq": "model-00012-of-00014.safetensors", + "model.layers.66.self_attn.v_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.67.input_layernorm.weight": "model-00012-of-00014.safetensors", + "model.layers.67.mlp.down_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.67.mlp.gate_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.67.mlp.up_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.67.post_attention_layernorm.weight": "model-00012-of-00014.safetensors", + "model.layers.67.self_attn.k_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.67.self_attn.o_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.67.self_attn.q_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.67.self_attn.rotary_emb.inv_freq": "model-00012-of-00014.safetensors", + "model.layers.67.self_attn.v_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.68.input_layernorm.weight": "model-00012-of-00014.safetensors", + "model.layers.68.mlp.down_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.68.mlp.gate_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.68.mlp.up_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.68.post_attention_layernorm.weight": "model-00012-of-00014.safetensors", + "model.layers.68.self_attn.k_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.68.self_attn.o_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.68.self_attn.q_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.68.self_attn.rotary_emb.inv_freq": "model-00012-of-00014.safetensors", + "model.layers.68.self_attn.v_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.69.input_layernorm.weight": "model-00012-of-00014.safetensors", + "model.layers.69.mlp.down_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.69.mlp.gate_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.69.mlp.up_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.69.post_attention_layernorm.weight": "model-00012-of-00014.safetensors", + "model.layers.69.self_attn.k_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.69.self_attn.o_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.69.self_attn.q_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.69.self_attn.rotary_emb.inv_freq": "model-00012-of-00014.safetensors", + "model.layers.69.self_attn.v_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.7.input_layernorm.weight": "model-00002-of-00014.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00014.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.7.self_attn.rotary_emb.inv_freq": "model-00002-of-00014.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.70.input_layernorm.weight": "model-00012-of-00014.safetensors", + "model.layers.70.mlp.down_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.70.mlp.gate_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.70.mlp.up_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.70.post_attention_layernorm.weight": "model-00012-of-00014.safetensors", + "model.layers.70.self_attn.k_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.70.self_attn.o_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.70.self_attn.q_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.70.self_attn.rotary_emb.inv_freq": "model-00012-of-00014.safetensors", + "model.layers.70.self_attn.v_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.71.input_layernorm.weight": "model-00013-of-00014.safetensors", + "model.layers.71.mlp.down_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.71.mlp.gate_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.71.mlp.up_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.71.post_attention_layernorm.weight": "model-00013-of-00014.safetensors", + "model.layers.71.self_attn.k_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.71.self_attn.o_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.71.self_attn.q_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.71.self_attn.rotary_emb.inv_freq": "model-00012-of-00014.safetensors", + "model.layers.71.self_attn.v_proj.weight": "model-00012-of-00014.safetensors", + "model.layers.72.input_layernorm.weight": "model-00013-of-00014.safetensors", + "model.layers.72.mlp.down_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.72.mlp.gate_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.72.mlp.up_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.72.post_attention_layernorm.weight": "model-00013-of-00014.safetensors", + "model.layers.72.self_attn.k_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.72.self_attn.o_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.72.self_attn.q_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.72.self_attn.rotary_emb.inv_freq": "model-00013-of-00014.safetensors", + "model.layers.72.self_attn.v_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.73.input_layernorm.weight": "model-00013-of-00014.safetensors", + "model.layers.73.mlp.down_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.73.mlp.gate_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.73.mlp.up_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.73.post_attention_layernorm.weight": "model-00013-of-00014.safetensors", + "model.layers.73.self_attn.k_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.73.self_attn.o_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.73.self_attn.q_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.73.self_attn.rotary_emb.inv_freq": "model-00013-of-00014.safetensors", + "model.layers.73.self_attn.v_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.74.input_layernorm.weight": "model-00013-of-00014.safetensors", + "model.layers.74.mlp.down_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.74.mlp.gate_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.74.mlp.up_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.74.post_attention_layernorm.weight": "model-00013-of-00014.safetensors", + "model.layers.74.self_attn.k_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.74.self_attn.o_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.74.self_attn.q_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.74.self_attn.rotary_emb.inv_freq": "model-00013-of-00014.safetensors", + "model.layers.74.self_attn.v_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.75.input_layernorm.weight": "model-00013-of-00014.safetensors", + "model.layers.75.mlp.down_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.75.mlp.gate_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.75.mlp.up_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.75.post_attention_layernorm.weight": "model-00013-of-00014.safetensors", + "model.layers.75.self_attn.k_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.75.self_attn.o_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.75.self_attn.q_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.75.self_attn.rotary_emb.inv_freq": "model-00013-of-00014.safetensors", + "model.layers.75.self_attn.v_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.76.input_layernorm.weight": "model-00013-of-00014.safetensors", + "model.layers.76.mlp.down_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.76.mlp.gate_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.76.mlp.up_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.76.post_attention_layernorm.weight": "model-00013-of-00014.safetensors", + "model.layers.76.self_attn.k_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.76.self_attn.o_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.76.self_attn.q_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.76.self_attn.rotary_emb.inv_freq": "model-00013-of-00014.safetensors", + "model.layers.76.self_attn.v_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.77.input_layernorm.weight": "model-00014-of-00014.safetensors", + "model.layers.77.mlp.down_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.77.mlp.gate_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.77.mlp.up_proj.weight": "model-00014-of-00014.safetensors", + "model.layers.77.post_attention_layernorm.weight": "model-00014-of-00014.safetensors", + "model.layers.77.self_attn.k_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.77.self_attn.o_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.77.self_attn.q_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.77.self_attn.rotary_emb.inv_freq": "model-00013-of-00014.safetensors", + "model.layers.77.self_attn.v_proj.weight": "model-00013-of-00014.safetensors", + "model.layers.78.input_layernorm.weight": "model-00014-of-00014.safetensors", + "model.layers.78.mlp.down_proj.weight": "model-00014-of-00014.safetensors", + "model.layers.78.mlp.gate_proj.weight": "model-00014-of-00014.safetensors", + "model.layers.78.mlp.up_proj.weight": "model-00014-of-00014.safetensors", + "model.layers.78.post_attention_layernorm.weight": "model-00014-of-00014.safetensors", + "model.layers.78.self_attn.k_proj.weight": "model-00014-of-00014.safetensors", + "model.layers.78.self_attn.o_proj.weight": "model-00014-of-00014.safetensors", + "model.layers.78.self_attn.q_proj.weight": "model-00014-of-00014.safetensors", + "model.layers.78.self_attn.rotary_emb.inv_freq": "model-00014-of-00014.safetensors", + "model.layers.78.self_attn.v_proj.weight": "model-00014-of-00014.safetensors", + "model.layers.79.input_layernorm.weight": "model-00014-of-00014.safetensors", + "model.layers.79.mlp.down_proj.weight": "model-00014-of-00014.safetensors", + "model.layers.79.mlp.gate_proj.weight": "model-00014-of-00014.safetensors", + "model.layers.79.mlp.up_proj.weight": "model-00014-of-00014.safetensors", + "model.layers.79.post_attention_layernorm.weight": "model-00014-of-00014.safetensors", + "model.layers.79.self_attn.k_proj.weight": "model-00014-of-00014.safetensors", + "model.layers.79.self_attn.o_proj.weight": "model-00014-of-00014.safetensors", + "model.layers.79.self_attn.q_proj.weight": "model-00014-of-00014.safetensors", + "model.layers.79.self_attn.rotary_emb.inv_freq": "model-00014-of-00014.safetensors", + "model.layers.79.self_attn.v_proj.weight": "model-00014-of-00014.safetensors", + "model.layers.8.input_layernorm.weight": "model-00002-of-00014.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00014.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.8.self_attn.rotary_emb.inv_freq": "model-00002-of-00014.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00014.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00014.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00014.safetensors", + "model.layers.9.self_attn.rotary_emb.inv_freq": "model-00002-of-00014.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00014.safetensors", + "model.norm.weight": "model-00014-of-00014.safetensors" + } +} \ No newline at end of file diff --git a/llama-hf/65B/pytorch_model.bin.index.json b/llama-hf/65B/pytorch_model.bin.index.json new file mode 100644 index 0000000000000000000000000000000000000000..ab700a6d1b9f5b536334b5875b964b145b70eab0 --- /dev/null +++ b/llama-hf/65B/pytorch_model.bin.index.json @@ -0,0 +1,810 @@ +{ + "metadata": { + "total_size": 130571341824 + }, + "weight_map": { + "lm_head.weight": "pytorch_model-00014-of-00014.bin", + "model.embed_tokens.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.0.mlp.gate_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.0.mlp.up_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.0.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00014.bin", + "model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.1.mlp.down_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.1.mlp.gate_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.1.mlp.up_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.1.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00014.bin", + "model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.10.input_layernorm.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.10.mlp.down_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.10.mlp.gate_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.10.mlp.up_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.10.post_attention_layernorm.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.10.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00014.bin", + "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.11.input_layernorm.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.11.mlp.down_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.11.mlp.gate_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.11.mlp.up_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.11.post_attention_layernorm.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.11.self_attn.k_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.11.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00014.bin", + "model.layers.11.self_attn.v_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.12.input_layernorm.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.12.mlp.down_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.12.mlp.gate_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.12.mlp.up_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.12.post_attention_layernorm.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.12.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00014.bin", + "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.13.input_layernorm.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.13.mlp.down_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.13.mlp.gate_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.13.mlp.up_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.13.post_attention_layernorm.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.13.self_attn.k_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.13.self_attn.o_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.13.self_attn.q_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.13.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00014.bin", + "model.layers.13.self_attn.v_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.14.input_layernorm.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.14.mlp.down_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.14.mlp.gate_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.14.mlp.up_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.14.post_attention_layernorm.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.14.self_attn.k_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.14.self_attn.o_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.14.self_attn.q_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.14.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00014.bin", + "model.layers.14.self_attn.v_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.15.input_layernorm.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.15.mlp.down_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.15.mlp.gate_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.15.mlp.up_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.15.post_attention_layernorm.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.15.self_attn.k_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.15.self_attn.o_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.15.self_attn.q_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.15.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00014.bin", + "model.layers.15.self_attn.v_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.16.input_layernorm.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.16.mlp.down_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.16.mlp.gate_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.16.mlp.up_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.16.post_attention_layernorm.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.16.self_attn.k_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.16.self_attn.o_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.16.self_attn.q_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.16.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00014.bin", + "model.layers.16.self_attn.v_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.17.input_layernorm.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.17.mlp.down_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.17.mlp.gate_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.17.mlp.up_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.17.post_attention_layernorm.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.17.self_attn.k_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.17.self_attn.o_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.17.self_attn.q_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.17.self_attn.rotary_emb.inv_freq": "pytorch_model-00003-of-00014.bin", + "model.layers.17.self_attn.v_proj.weight": "pytorch_model-00003-of-00014.bin", + "model.layers.18.input_layernorm.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.18.mlp.down_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.18.mlp.gate_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.18.mlp.up_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.18.post_attention_layernorm.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.18.self_attn.q_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.18.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00014.bin", + "model.layers.18.self_attn.v_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.19.input_layernorm.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.19.mlp.down_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.19.mlp.gate_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.19.mlp.up_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.19.post_attention_layernorm.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.19.self_attn.k_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.19.self_attn.o_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.19.self_attn.q_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.19.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00014.bin", + "model.layers.19.self_attn.v_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.2.input_layernorm.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.2.mlp.down_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.2.mlp.gate_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.2.mlp.up_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.2.post_attention_layernorm.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.2.self_attn.k_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.2.self_attn.o_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.2.self_attn.q_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.2.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00014.bin", + "model.layers.2.self_attn.v_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.20.input_layernorm.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.20.mlp.down_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.20.mlp.gate_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.20.mlp.up_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.20.post_attention_layernorm.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.20.self_attn.k_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.20.self_attn.o_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.20.self_attn.q_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.20.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00014.bin", + "model.layers.20.self_attn.v_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.21.input_layernorm.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.21.mlp.down_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.21.mlp.gate_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.21.mlp.up_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.21.post_attention_layernorm.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.21.self_attn.k_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.21.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00014.bin", + "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.22.input_layernorm.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.22.mlp.down_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.22.mlp.gate_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.22.mlp.up_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.22.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00014.bin", + "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.23.input_layernorm.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.23.mlp.down_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.23.mlp.gate_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.23.mlp.up_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.23.self_attn.rotary_emb.inv_freq": "pytorch_model-00004-of-00014.bin", + "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00004-of-00014.bin", + "model.layers.24.input_layernorm.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.24.mlp.down_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.24.mlp.gate_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.24.mlp.up_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.24.post_attention_layernorm.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.24.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00014.bin", + "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.25.input_layernorm.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.25.mlp.down_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.25.mlp.gate_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.25.mlp.up_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.25.post_attention_layernorm.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.25.self_attn.k_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.25.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00014.bin", + "model.layers.25.self_attn.v_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.26.input_layernorm.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.26.mlp.down_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.26.mlp.gate_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.26.mlp.up_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.26.post_attention_layernorm.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.26.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00014.bin", + "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.27.input_layernorm.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.27.mlp.down_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.27.mlp.gate_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.27.mlp.up_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.27.post_attention_layernorm.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.27.self_attn.k_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.27.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00014.bin", + "model.layers.27.self_attn.v_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.28.input_layernorm.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.28.mlp.down_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.28.mlp.gate_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.28.mlp.up_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.28.post_attention_layernorm.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.28.self_attn.k_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.28.self_attn.o_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.28.self_attn.q_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.28.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00014.bin", + "model.layers.28.self_attn.v_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.29.input_layernorm.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.29.mlp.down_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.29.mlp.gate_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.29.mlp.up_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.29.post_attention_layernorm.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.29.self_attn.k_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.29.self_attn.o_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.29.self_attn.q_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.29.self_attn.rotary_emb.inv_freq": "pytorch_model-00005-of-00014.bin", + "model.layers.29.self_attn.v_proj.weight": "pytorch_model-00005-of-00014.bin", + "model.layers.3.input_layernorm.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.3.mlp.down_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.3.mlp.gate_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.3.mlp.up_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.3.post_attention_layernorm.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.3.self_attn.k_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.3.self_attn.o_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.3.self_attn.q_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.3.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00014.bin", + "model.layers.3.self_attn.v_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.30.input_layernorm.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.30.mlp.down_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.30.mlp.gate_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.30.mlp.up_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.30.post_attention_layernorm.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.30.self_attn.k_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.30.self_attn.o_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.30.self_attn.q_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.30.self_attn.rotary_emb.inv_freq": "pytorch_model-00006-of-00014.bin", + "model.layers.30.self_attn.v_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.31.input_layernorm.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.31.mlp.down_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.31.mlp.gate_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.31.mlp.up_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.31.post_attention_layernorm.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.31.self_attn.k_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.31.self_attn.o_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.31.self_attn.q_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.31.self_attn.rotary_emb.inv_freq": "pytorch_model-00006-of-00014.bin", + "model.layers.31.self_attn.v_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.32.input_layernorm.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.32.mlp.down_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.32.mlp.gate_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.32.mlp.up_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.32.post_attention_layernorm.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.32.self_attn.k_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.32.self_attn.o_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.32.self_attn.q_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.32.self_attn.rotary_emb.inv_freq": "pytorch_model-00006-of-00014.bin", + "model.layers.32.self_attn.v_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.33.input_layernorm.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.33.mlp.down_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.33.mlp.gate_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.33.mlp.up_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.33.post_attention_layernorm.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.33.self_attn.k_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.33.self_attn.o_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.33.self_attn.q_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.33.self_attn.rotary_emb.inv_freq": "pytorch_model-00006-of-00014.bin", + "model.layers.33.self_attn.v_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.34.input_layernorm.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.34.mlp.down_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.34.mlp.gate_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.34.mlp.up_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.34.post_attention_layernorm.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.34.self_attn.k_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.34.self_attn.o_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.34.self_attn.q_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.34.self_attn.rotary_emb.inv_freq": "pytorch_model-00006-of-00014.bin", + "model.layers.34.self_attn.v_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.35.input_layernorm.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.35.mlp.down_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.35.mlp.gate_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.35.mlp.up_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.35.post_attention_layernorm.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.35.self_attn.k_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.35.self_attn.o_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.35.self_attn.q_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.35.self_attn.rotary_emb.inv_freq": "pytorch_model-00006-of-00014.bin", + "model.layers.35.self_attn.v_proj.weight": "pytorch_model-00006-of-00014.bin", + "model.layers.36.input_layernorm.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.36.mlp.down_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.36.mlp.gate_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.36.mlp.up_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.36.post_attention_layernorm.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.36.self_attn.k_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.36.self_attn.o_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.36.self_attn.q_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.36.self_attn.rotary_emb.inv_freq": "pytorch_model-00007-of-00014.bin", + "model.layers.36.self_attn.v_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.37.input_layernorm.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.37.mlp.down_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.37.mlp.gate_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.37.mlp.up_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.37.post_attention_layernorm.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.37.self_attn.k_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.37.self_attn.o_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.37.self_attn.q_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.37.self_attn.rotary_emb.inv_freq": "pytorch_model-00007-of-00014.bin", + "model.layers.37.self_attn.v_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.38.input_layernorm.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.38.mlp.down_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.38.mlp.gate_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.38.mlp.up_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.38.post_attention_layernorm.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.38.self_attn.k_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.38.self_attn.o_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.38.self_attn.q_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.38.self_attn.rotary_emb.inv_freq": "pytorch_model-00007-of-00014.bin", + "model.layers.38.self_attn.v_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.39.input_layernorm.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.39.mlp.down_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.39.mlp.gate_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.39.mlp.up_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.39.post_attention_layernorm.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.39.self_attn.k_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.39.self_attn.o_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.39.self_attn.q_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.39.self_attn.rotary_emb.inv_freq": "pytorch_model-00007-of-00014.bin", + "model.layers.39.self_attn.v_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.4.input_layernorm.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.4.mlp.down_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.4.mlp.gate_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.4.mlp.up_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.4.post_attention_layernorm.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.4.self_attn.k_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.4.self_attn.o_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.4.self_attn.q_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.4.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00014.bin", + "model.layers.4.self_attn.v_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.40.input_layernorm.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.40.mlp.down_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.40.mlp.gate_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.40.mlp.up_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.40.post_attention_layernorm.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.40.self_attn.k_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.40.self_attn.o_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.40.self_attn.q_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.40.self_attn.rotary_emb.inv_freq": "pytorch_model-00007-of-00014.bin", + "model.layers.40.self_attn.v_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.41.input_layernorm.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.41.mlp.down_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.41.mlp.gate_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.41.mlp.up_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.41.post_attention_layernorm.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.41.self_attn.k_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.41.self_attn.o_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.41.self_attn.q_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.41.self_attn.rotary_emb.inv_freq": "pytorch_model-00007-of-00014.bin", + "model.layers.41.self_attn.v_proj.weight": "pytorch_model-00007-of-00014.bin", + "model.layers.42.input_layernorm.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.42.mlp.down_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.42.mlp.gate_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.42.mlp.up_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.42.post_attention_layernorm.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.42.self_attn.k_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.42.self_attn.o_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.42.self_attn.q_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.42.self_attn.rotary_emb.inv_freq": "pytorch_model-00008-of-00014.bin", + "model.layers.42.self_attn.v_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.43.input_layernorm.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.43.mlp.down_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.43.mlp.gate_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.43.mlp.up_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.43.post_attention_layernorm.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.43.self_attn.k_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.43.self_attn.o_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.43.self_attn.q_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.43.self_attn.rotary_emb.inv_freq": "pytorch_model-00008-of-00014.bin", + "model.layers.43.self_attn.v_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.44.input_layernorm.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.44.mlp.down_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.44.mlp.gate_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.44.mlp.up_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.44.post_attention_layernorm.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.44.self_attn.k_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.44.self_attn.o_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.44.self_attn.q_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.44.self_attn.rotary_emb.inv_freq": "pytorch_model-00008-of-00014.bin", + "model.layers.44.self_attn.v_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.45.input_layernorm.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.45.mlp.down_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.45.mlp.gate_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.45.mlp.up_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.45.post_attention_layernorm.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.45.self_attn.k_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.45.self_attn.o_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.45.self_attn.q_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.45.self_attn.rotary_emb.inv_freq": "pytorch_model-00008-of-00014.bin", + "model.layers.45.self_attn.v_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.46.input_layernorm.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.46.mlp.down_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.46.mlp.gate_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.46.mlp.up_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.46.post_attention_layernorm.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.46.self_attn.k_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.46.self_attn.o_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.46.self_attn.q_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.46.self_attn.rotary_emb.inv_freq": "pytorch_model-00008-of-00014.bin", + "model.layers.46.self_attn.v_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.47.input_layernorm.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.47.mlp.down_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.47.mlp.gate_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.47.mlp.up_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.47.post_attention_layernorm.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.47.self_attn.k_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.47.self_attn.o_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.47.self_attn.q_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.47.self_attn.rotary_emb.inv_freq": "pytorch_model-00008-of-00014.bin", + "model.layers.47.self_attn.v_proj.weight": "pytorch_model-00008-of-00014.bin", + "model.layers.48.input_layernorm.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.48.mlp.down_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.48.mlp.gate_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.48.mlp.up_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.48.post_attention_layernorm.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.48.self_attn.k_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.48.self_attn.o_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.48.self_attn.q_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.48.self_attn.rotary_emb.inv_freq": "pytorch_model-00009-of-00014.bin", + "model.layers.48.self_attn.v_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.49.input_layernorm.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.49.mlp.down_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.49.mlp.gate_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.49.mlp.up_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.49.post_attention_layernorm.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.49.self_attn.k_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.49.self_attn.o_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.49.self_attn.q_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.49.self_attn.rotary_emb.inv_freq": "pytorch_model-00009-of-00014.bin", + "model.layers.49.self_attn.v_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.5.input_layernorm.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.5.mlp.down_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.5.mlp.gate_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.5.mlp.up_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.5.post_attention_layernorm.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.5.self_attn.k_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.5.self_attn.o_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.5.self_attn.q_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.5.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00014.bin", + "model.layers.5.self_attn.v_proj.weight": "pytorch_model-00001-of-00014.bin", + "model.layers.50.input_layernorm.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.50.mlp.down_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.50.mlp.gate_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.50.mlp.up_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.50.post_attention_layernorm.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.50.self_attn.k_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.50.self_attn.o_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.50.self_attn.q_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.50.self_attn.rotary_emb.inv_freq": "pytorch_model-00009-of-00014.bin", + "model.layers.50.self_attn.v_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.51.input_layernorm.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.51.mlp.down_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.51.mlp.gate_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.51.mlp.up_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.51.post_attention_layernorm.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.51.self_attn.k_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.51.self_attn.o_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.51.self_attn.q_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.51.self_attn.rotary_emb.inv_freq": "pytorch_model-00009-of-00014.bin", + "model.layers.51.self_attn.v_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.52.input_layernorm.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.52.mlp.down_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.52.mlp.gate_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.52.mlp.up_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.52.post_attention_layernorm.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.52.self_attn.k_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.52.self_attn.o_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.52.self_attn.q_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.52.self_attn.rotary_emb.inv_freq": "pytorch_model-00009-of-00014.bin", + "model.layers.52.self_attn.v_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.53.input_layernorm.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.53.mlp.down_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.53.mlp.gate_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.53.mlp.up_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.53.post_attention_layernorm.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.53.self_attn.k_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.53.self_attn.o_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.53.self_attn.q_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.53.self_attn.rotary_emb.inv_freq": "pytorch_model-00009-of-00014.bin", + "model.layers.53.self_attn.v_proj.weight": "pytorch_model-00009-of-00014.bin", + "model.layers.54.input_layernorm.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.54.mlp.down_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.54.mlp.gate_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.54.mlp.up_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.54.post_attention_layernorm.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.54.self_attn.k_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.54.self_attn.o_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.54.self_attn.q_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.54.self_attn.rotary_emb.inv_freq": "pytorch_model-00010-of-00014.bin", + "model.layers.54.self_attn.v_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.55.input_layernorm.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.55.mlp.down_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.55.mlp.gate_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.55.mlp.up_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.55.post_attention_layernorm.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.55.self_attn.k_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.55.self_attn.o_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.55.self_attn.q_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.55.self_attn.rotary_emb.inv_freq": "pytorch_model-00010-of-00014.bin", + "model.layers.55.self_attn.v_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.56.input_layernorm.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.56.mlp.down_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.56.mlp.gate_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.56.mlp.up_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.56.post_attention_layernorm.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.56.self_attn.k_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.56.self_attn.o_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.56.self_attn.q_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.56.self_attn.rotary_emb.inv_freq": "pytorch_model-00010-of-00014.bin", + "model.layers.56.self_attn.v_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.57.input_layernorm.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.57.mlp.down_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.57.mlp.gate_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.57.mlp.up_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.57.post_attention_layernorm.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.57.self_attn.k_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.57.self_attn.o_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.57.self_attn.q_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.57.self_attn.rotary_emb.inv_freq": "pytorch_model-00010-of-00014.bin", + "model.layers.57.self_attn.v_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.58.input_layernorm.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.58.mlp.down_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.58.mlp.gate_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.58.mlp.up_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.58.post_attention_layernorm.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.58.self_attn.k_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.58.self_attn.o_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.58.self_attn.q_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.58.self_attn.rotary_emb.inv_freq": "pytorch_model-00010-of-00014.bin", + "model.layers.58.self_attn.v_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.59.input_layernorm.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.59.mlp.down_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.59.mlp.gate_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.59.mlp.up_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.59.post_attention_layernorm.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.59.self_attn.k_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.59.self_attn.o_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.59.self_attn.q_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.59.self_attn.rotary_emb.inv_freq": "pytorch_model-00010-of-00014.bin", + "model.layers.59.self_attn.v_proj.weight": "pytorch_model-00010-of-00014.bin", + "model.layers.6.input_layernorm.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.6.mlp.down_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.6.mlp.gate_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.6.mlp.up_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.6.post_attention_layernorm.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.6.self_attn.k_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.6.self_attn.o_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.6.self_attn.q_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.6.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00014.bin", + "model.layers.6.self_attn.v_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.60.input_layernorm.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.60.mlp.down_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.60.mlp.gate_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.60.mlp.up_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.60.post_attention_layernorm.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.60.self_attn.k_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.60.self_attn.o_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.60.self_attn.q_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.60.self_attn.rotary_emb.inv_freq": "pytorch_model-00011-of-00014.bin", + "model.layers.60.self_attn.v_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.61.input_layernorm.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.61.mlp.down_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.61.mlp.gate_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.61.mlp.up_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.61.post_attention_layernorm.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.61.self_attn.k_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.61.self_attn.o_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.61.self_attn.q_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.61.self_attn.rotary_emb.inv_freq": "pytorch_model-00011-of-00014.bin", + "model.layers.61.self_attn.v_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.62.input_layernorm.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.62.mlp.down_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.62.mlp.gate_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.62.mlp.up_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.62.post_attention_layernorm.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.62.self_attn.k_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.62.self_attn.o_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.62.self_attn.q_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.62.self_attn.rotary_emb.inv_freq": "pytorch_model-00011-of-00014.bin", + "model.layers.62.self_attn.v_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.63.input_layernorm.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.63.mlp.down_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.63.mlp.gate_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.63.mlp.up_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.63.post_attention_layernorm.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.63.self_attn.k_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.63.self_attn.o_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.63.self_attn.q_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.63.self_attn.rotary_emb.inv_freq": "pytorch_model-00011-of-00014.bin", + "model.layers.63.self_attn.v_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.64.input_layernorm.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.64.mlp.down_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.64.mlp.gate_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.64.mlp.up_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.64.post_attention_layernorm.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.64.self_attn.k_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.64.self_attn.o_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.64.self_attn.q_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.64.self_attn.rotary_emb.inv_freq": "pytorch_model-00011-of-00014.bin", + "model.layers.64.self_attn.v_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.65.input_layernorm.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.65.mlp.down_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.65.mlp.gate_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.65.mlp.up_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.65.post_attention_layernorm.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.65.self_attn.k_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.65.self_attn.o_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.65.self_attn.q_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.65.self_attn.rotary_emb.inv_freq": "pytorch_model-00011-of-00014.bin", + "model.layers.65.self_attn.v_proj.weight": "pytorch_model-00011-of-00014.bin", + "model.layers.66.input_layernorm.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.66.mlp.down_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.66.mlp.gate_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.66.mlp.up_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.66.post_attention_layernorm.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.66.self_attn.k_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.66.self_attn.o_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.66.self_attn.q_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.66.self_attn.rotary_emb.inv_freq": "pytorch_model-00012-of-00014.bin", + "model.layers.66.self_attn.v_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.67.input_layernorm.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.67.mlp.down_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.67.mlp.gate_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.67.mlp.up_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.67.post_attention_layernorm.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.67.self_attn.k_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.67.self_attn.o_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.67.self_attn.q_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.67.self_attn.rotary_emb.inv_freq": "pytorch_model-00012-of-00014.bin", + "model.layers.67.self_attn.v_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.68.input_layernorm.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.68.mlp.down_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.68.mlp.gate_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.68.mlp.up_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.68.post_attention_layernorm.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.68.self_attn.k_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.68.self_attn.o_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.68.self_attn.q_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.68.self_attn.rotary_emb.inv_freq": "pytorch_model-00012-of-00014.bin", + "model.layers.68.self_attn.v_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.69.input_layernorm.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.69.mlp.down_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.69.mlp.gate_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.69.mlp.up_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.69.post_attention_layernorm.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.69.self_attn.k_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.69.self_attn.o_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.69.self_attn.q_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.69.self_attn.rotary_emb.inv_freq": "pytorch_model-00012-of-00014.bin", + "model.layers.69.self_attn.v_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.7.input_layernorm.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.7.mlp.down_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.7.mlp.gate_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.7.mlp.up_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.7.post_attention_layernorm.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.7.self_attn.k_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.7.self_attn.o_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.7.self_attn.q_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.7.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00014.bin", + "model.layers.7.self_attn.v_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.70.input_layernorm.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.70.mlp.down_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.70.mlp.gate_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.70.mlp.up_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.70.post_attention_layernorm.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.70.self_attn.k_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.70.self_attn.o_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.70.self_attn.q_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.70.self_attn.rotary_emb.inv_freq": "pytorch_model-00012-of-00014.bin", + "model.layers.70.self_attn.v_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.71.input_layernorm.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.71.mlp.down_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.71.mlp.gate_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.71.mlp.up_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.71.post_attention_layernorm.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.71.self_attn.k_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.71.self_attn.o_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.71.self_attn.q_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.71.self_attn.rotary_emb.inv_freq": "pytorch_model-00012-of-00014.bin", + "model.layers.71.self_attn.v_proj.weight": "pytorch_model-00012-of-00014.bin", + "model.layers.72.input_layernorm.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.72.mlp.down_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.72.mlp.gate_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.72.mlp.up_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.72.post_attention_layernorm.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.72.self_attn.k_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.72.self_attn.o_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.72.self_attn.q_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.72.self_attn.rotary_emb.inv_freq": "pytorch_model-00013-of-00014.bin", + "model.layers.72.self_attn.v_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.73.input_layernorm.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.73.mlp.down_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.73.mlp.gate_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.73.mlp.up_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.73.post_attention_layernorm.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.73.self_attn.k_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.73.self_attn.o_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.73.self_attn.q_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.73.self_attn.rotary_emb.inv_freq": "pytorch_model-00013-of-00014.bin", + "model.layers.73.self_attn.v_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.74.input_layernorm.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.74.mlp.down_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.74.mlp.gate_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.74.mlp.up_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.74.post_attention_layernorm.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.74.self_attn.k_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.74.self_attn.o_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.74.self_attn.q_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.74.self_attn.rotary_emb.inv_freq": "pytorch_model-00013-of-00014.bin", + "model.layers.74.self_attn.v_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.75.input_layernorm.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.75.mlp.down_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.75.mlp.gate_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.75.mlp.up_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.75.post_attention_layernorm.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.75.self_attn.k_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.75.self_attn.o_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.75.self_attn.q_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.75.self_attn.rotary_emb.inv_freq": "pytorch_model-00013-of-00014.bin", + "model.layers.75.self_attn.v_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.76.input_layernorm.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.76.mlp.down_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.76.mlp.gate_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.76.mlp.up_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.76.post_attention_layernorm.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.76.self_attn.k_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.76.self_attn.o_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.76.self_attn.q_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.76.self_attn.rotary_emb.inv_freq": "pytorch_model-00013-of-00014.bin", + "model.layers.76.self_attn.v_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.77.input_layernorm.weight": "pytorch_model-00014-of-00014.bin", + "model.layers.77.mlp.down_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.77.mlp.gate_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.77.mlp.up_proj.weight": "pytorch_model-00014-of-00014.bin", + "model.layers.77.post_attention_layernorm.weight": "pytorch_model-00014-of-00014.bin", + "model.layers.77.self_attn.k_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.77.self_attn.o_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.77.self_attn.q_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.77.self_attn.rotary_emb.inv_freq": "pytorch_model-00013-of-00014.bin", + "model.layers.77.self_attn.v_proj.weight": "pytorch_model-00013-of-00014.bin", + "model.layers.78.input_layernorm.weight": "pytorch_model-00014-of-00014.bin", + "model.layers.78.mlp.down_proj.weight": "pytorch_model-00014-of-00014.bin", + "model.layers.78.mlp.gate_proj.weight": "pytorch_model-00014-of-00014.bin", + "model.layers.78.mlp.up_proj.weight": "pytorch_model-00014-of-00014.bin", + "model.layers.78.post_attention_layernorm.weight": "pytorch_model-00014-of-00014.bin", + "model.layers.78.self_attn.k_proj.weight": "pytorch_model-00014-of-00014.bin", + "model.layers.78.self_attn.o_proj.weight": "pytorch_model-00014-of-00014.bin", + "model.layers.78.self_attn.q_proj.weight": "pytorch_model-00014-of-00014.bin", + "model.layers.78.self_attn.rotary_emb.inv_freq": "pytorch_model-00014-of-00014.bin", + "model.layers.78.self_attn.v_proj.weight": "pytorch_model-00014-of-00014.bin", + "model.layers.79.input_layernorm.weight": "pytorch_model-00014-of-00014.bin", + "model.layers.79.mlp.down_proj.weight": "pytorch_model-00014-of-00014.bin", + "model.layers.79.mlp.gate_proj.weight": "pytorch_model-00014-of-00014.bin", + "model.layers.79.mlp.up_proj.weight": "pytorch_model-00014-of-00014.bin", + "model.layers.79.post_attention_layernorm.weight": "pytorch_model-00014-of-00014.bin", + "model.layers.79.self_attn.k_proj.weight": "pytorch_model-00014-of-00014.bin", + "model.layers.79.self_attn.o_proj.weight": "pytorch_model-00014-of-00014.bin", + "model.layers.79.self_attn.q_proj.weight": "pytorch_model-00014-of-00014.bin", + "model.layers.79.self_attn.rotary_emb.inv_freq": "pytorch_model-00014-of-00014.bin", + "model.layers.79.self_attn.v_proj.weight": "pytorch_model-00014-of-00014.bin", + "model.layers.8.input_layernorm.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.8.mlp.down_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.8.mlp.gate_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.8.mlp.up_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.8.post_attention_layernorm.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.8.self_attn.k_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.8.self_attn.o_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.8.self_attn.q_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.8.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00014.bin", + "model.layers.8.self_attn.v_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.9.input_layernorm.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.9.mlp.down_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.9.mlp.gate_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.9.mlp.up_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.9.post_attention_layernorm.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.9.self_attn.k_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.layers.9.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00014.bin", + "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00002-of-00014.bin", + "model.norm.weight": "pytorch_model-00014-of-00014.bin" + } +} diff --git a/llama-hf/65B/special_tokens_map.json b/llama-hf/65B/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/llama-hf/65B/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/llama-hf/65B/tokenizer.model b/llama-hf/65B/tokenizer.model new file mode 120000 index 0000000000000000000000000000000000000000..16479c4d5ef98c12b179e1557bb3d4039757d83b --- /dev/null +++ b/llama-hf/65B/tokenizer.model @@ -0,0 +1 @@ +../../.git/annex/objects/m9/qj/SHA256E-s499723--9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347/SHA256E-s499723--9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 \ No newline at end of file diff --git a/llama-hf/65B/tokenizer_config.json b/llama-hf/65B/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a54b01aa3699f19e1aea416fc337f910f60c6839 --- /dev/null +++ b/llama-hf/65B/tokenizer_config.json @@ -0,0 +1 @@ +{"bos_token": "", "eos_token": "", "model_max_length": 1000000000000000019884624838656, "tokenizer_class": "LlamaTokenizer", "unk_token": ""} \ No newline at end of file diff --git a/llama-hf/7B/config.json b/llama-hf/7B/config.json new file mode 100644 index 0000000000000000000000000000000000000000..ef0484f4fd8c6e39de6de5ef740b3cff866dcfaa --- /dev/null +++ b/llama-hf/7B/config.json @@ -0,0 +1,21 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 11008, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "pad_token_id": 0, + "rms_norm_eps": 1e-06, + "tie_word_embeddings": false, + "torch_dtype": "float16", + "transformers_version": "4.28.0.dev0", + "use_cache": true, + "vocab_size": 32000 +} diff --git a/llama-hf/7B/generation_config.json b/llama-hf/7B/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..55d7b5b6db760f8c1963be3d56a3bc363bacdfb1 --- /dev/null +++ b/llama-hf/7B/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "pad_token_id": 0, + "transformers_version": "4.28.0.dev0" +} diff --git a/llama-hf/7B/model-00001-of-00002.safetensors b/llama-hf/7B/model-00001-of-00002.safetensors new file mode 120000 index 0000000000000000000000000000000000000000..d7612bf6258fd6e9edea6f4782ffc84e30b59550 --- /dev/null +++ b/llama-hf/7B/model-00001-of-00002.safetensors @@ -0,0 +1 @@ +../../.git/annex/objects/F1/30/SHA256E-s9976578930--7f15a919bdb38ad81574d399c95053bbf44ae69440a9a29f4d965b691c957b54/SHA256E-s9976578930--7f15a919bdb38ad81574d399c95053bbf44ae69440a9a29f4d965b691c957b54 \ No newline at end of file diff --git a/llama-hf/7B/model-00002-of-00002.safetensors b/llama-hf/7B/model-00002-of-00002.safetensors new file mode 120000 index 0000000000000000000000000000000000000000..5157fad11e700c99a9351b75d94c2c54cce8d2fe --- /dev/null +++ b/llama-hf/7B/model-00002-of-00002.safetensors @@ -0,0 +1 @@ +../../.git/annex/objects/VZ/GW/SHA256E-s3500297344--e162edeaa58f65288ed886f79e89b9f777df435025e58632f821953e35295415/SHA256E-s3500297344--e162edeaa58f65288ed886f79e89b9f777df435025e58632f821953e35295415 \ No newline at end of file diff --git a/llama-hf/7B/model.safetensors.index.json b/llama-hf/7B/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..2787a2c30c32675c1ef50cddc53df20be1d378a2 --- /dev/null +++ b/llama-hf/7B/model.safetensors.index.json @@ -0,0 +1,330 @@ +{ + "metadata": { + "total_size": 13476839424 + }, + "weight_map": { + "lm_head.weight": "model-00002-of-00002.safetensors", + "model.embed_tokens.weight": "model-00001-of-00002.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.rotary_emb.inv_freq": "model-00001-of-00002.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.rotary_emb.inv_freq": "model-00001-of-00002.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.rotary_emb.inv_freq": "model-00001-of-00002.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.rotary_emb.inv_freq": "model-00001-of-00002.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.rotary_emb.inv_freq": "model-00001-of-00002.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.rotary_emb.inv_freq": "model-00001-of-00002.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.rotary_emb.inv_freq": "model-00001-of-00002.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.rotary_emb.inv_freq": "model-00001-of-00002.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.rotary_emb.inv_freq": "model-00001-of-00002.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.rotary_emb.inv_freq": "model-00001-of-00002.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.rotary_emb.inv_freq": "model-00001-of-00002.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.rotary_emb.inv_freq": "model-00001-of-00002.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.rotary_emb.inv_freq": "model-00001-of-00002.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.rotary_emb.inv_freq": "model-00001-of-00002.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.21.self_attn.rotary_emb.inv_freq": "model-00001-of-00002.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.22.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.22.self_attn.rotary_emb.inv_freq": "model-00001-of-00002.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.23.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.23.self_attn.rotary_emb.inv_freq": "model-00001-of-00002.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.rotary_emb.inv_freq": "model-00002-of-00002.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.rotary_emb.inv_freq": "model-00002-of-00002.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.rotary_emb.inv_freq": "model-00002-of-00002.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.rotary_emb.inv_freq": "model-00002-of-00002.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.28.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.28.self_attn.rotary_emb.inv_freq": "model-00002-of-00002.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.29.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.29.self_attn.rotary_emb.inv_freq": "model-00002-of-00002.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.rotary_emb.inv_freq": "model-00001-of-00002.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.30.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.30.self_attn.rotary_emb.inv_freq": "model-00002-of-00002.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00002-of-00002.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.31.self_attn.rotary_emb.inv_freq": "model-00002-of-00002.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.rotary_emb.inv_freq": "model-00001-of-00002.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.rotary_emb.inv_freq": "model-00001-of-00002.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.rotary_emb.inv_freq": "model-00001-of-00002.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.rotary_emb.inv_freq": "model-00001-of-00002.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.rotary_emb.inv_freq": "model-00001-of-00002.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.rotary_emb.inv_freq": "model-00001-of-00002.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors", + "model.norm.weight": "model-00002-of-00002.safetensors" + } +} \ No newline at end of file diff --git a/llama-hf/7B/pytorch_model.bin.index.json b/llama-hf/7B/pytorch_model.bin.index.json new file mode 100644 index 0000000000000000000000000000000000000000..db7264b24cac7a39947bb5fc02fe5c2d7ac9eaf4 --- /dev/null +++ b/llama-hf/7B/pytorch_model.bin.index.json @@ -0,0 +1,330 @@ +{ + "metadata": { + "total_size": 13476839424 + }, + "weight_map": { + "lm_head.weight": "pytorch_model-00002-of-00002.bin", + "model.embed_tokens.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.0.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.0.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.0.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.1.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.1.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.1.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.1.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.10.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.10.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.10.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.10.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.10.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.10.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.11.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.11.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.11.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.11.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.11.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.11.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.11.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.11.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.12.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.12.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.12.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.12.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.12.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.12.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.13.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.13.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.13.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.13.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.13.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.13.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.13.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.13.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.13.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.13.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.14.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.14.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.14.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.14.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.14.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.14.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.14.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.14.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.14.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.14.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.15.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.15.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.15.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.15.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.15.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.15.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.15.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.15.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.15.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.15.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.16.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.16.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.16.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.16.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.16.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.16.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.16.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.16.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.16.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.16.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.17.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.17.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.17.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.17.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.17.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.17.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.17.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.17.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.17.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.17.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.18.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.18.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.18.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.18.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.18.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.18.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.18.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.18.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.19.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.19.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.19.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.19.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.19.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.19.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.19.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.19.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.19.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.19.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.2.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.2.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.2.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.2.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.2.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.2.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.2.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.2.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.2.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.2.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.20.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.20.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.20.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.20.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.20.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.20.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.20.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.20.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.20.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.20.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.21.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.21.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.21.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.21.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.21.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.21.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.21.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.22.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.22.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.22.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.22.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.22.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.23.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.23.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.23.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.23.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.23.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.24.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.24.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.24.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.24.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.24.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.24.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00002.bin", + "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.25.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.25.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.25.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.25.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.25.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.25.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.25.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00002.bin", + "model.layers.25.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.26.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.26.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.26.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.26.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.26.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.26.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00002.bin", + "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.27.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.27.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.27.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.27.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.27.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.27.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.27.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00002.bin", + "model.layers.27.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.28.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.28.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.28.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.28.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.28.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.28.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.28.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.28.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.28.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00002.bin", + "model.layers.28.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.29.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.29.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.29.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.29.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.29.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.29.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.29.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.29.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.29.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00002.bin", + "model.layers.29.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.3.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.3.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.3.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.3.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.3.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.3.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.3.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.3.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.3.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.3.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.30.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.30.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.30.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.30.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.30.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.30.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.30.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.30.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.30.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00002.bin", + "model.layers.30.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.31.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.31.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.31.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.31.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.31.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.31.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.31.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.31.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.31.self_attn.rotary_emb.inv_freq": "pytorch_model-00002-of-00002.bin", + "model.layers.31.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", + "model.layers.4.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.4.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.4.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.4.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.4.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.4.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.4.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.4.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.4.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.4.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.5.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.5.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.5.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.5.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.5.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.5.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.5.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.5.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.5.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.5.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.6.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.6.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.6.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.6.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.6.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.6.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.6.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.6.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.6.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.6.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.7.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.7.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.7.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.7.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.7.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.7.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.7.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.7.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.7.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.7.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.8.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.8.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.8.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.8.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.8.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.8.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.8.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.8.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.8.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.8.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.9.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.9.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.9.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.9.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.9.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.9.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.layers.9.self_attn.rotary_emb.inv_freq": "pytorch_model-00001-of-00002.bin", + "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", + "model.norm.weight": "pytorch_model-00002-of-00002.bin" + } +} diff --git a/llama-hf/7B/special_tokens_map.json b/llama-hf/7B/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..9e26dfeeb6e641a33dae4961196235bdb965b21b --- /dev/null +++ b/llama-hf/7B/special_tokens_map.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/llama-hf/7B/tokenizer.model b/llama-hf/7B/tokenizer.model new file mode 120000 index 0000000000000000000000000000000000000000..16479c4d5ef98c12b179e1557bb3d4039757d83b --- /dev/null +++ b/llama-hf/7B/tokenizer.model @@ -0,0 +1 @@ +../../.git/annex/objects/m9/qj/SHA256E-s499723--9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347/SHA256E-s499723--9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 \ No newline at end of file diff --git a/llama-hf/7B/tokenizer_config.json b/llama-hf/7B/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a54b01aa3699f19e1aea416fc337f910f60c6839 --- /dev/null +++ b/llama-hf/7B/tokenizer_config.json @@ -0,0 +1 @@ +{"bos_token": "", "eos_token": "", "model_max_length": 1000000000000000019884624838656, "tokenizer_class": "LlamaTokenizer", "unk_token": ""} \ No newline at end of file