diff --git a/config.json b/config.json index de132a80b25c9d12c674dbd84cedc1d6f35fb756..4855aab4a7cb74273566f063b7c6646b434ab716 100644 --- a/config.json +++ b/config.json @@ -1,4 +1,5 @@ { + "_name_or_path": "./dpo_output/current-finetuned-model_run_3", "architectures": [ "MixtralForCausalLM" ], @@ -20,10 +21,11 @@ "rms_norm_eps": 1e-05, "rope_theta": 1000000.0, "router_aux_loss_coef": 0.02, + "router_jitter_noise": 0.0, "sliding_window": null, "tie_word_embeddings": false, - "torch_dtype": "bfloat16", - "transformers_version": "4.36.0.dev0", + "torch_dtype": "float16", + "transformers_version": "4.46.3", "use_cache": true, "vocab_size": 32000 } diff --git a/generation_config.json b/generation_config.json index c441554e91bd20deec0e4c45388b2ac2cc602f2b..36d1470f8a8e399205e75e1f5016669d6da3c5e6 100644 --- a/generation_config.json +++ b/generation_config.json @@ -2,5 +2,5 @@ "_from_model_config": true, "bos_token_id": 1, "eos_token_id": 2, - "transformers_version": "4.36.0.dev0" + "transformers_version": "4.46.3" } diff --git a/model-00001-of-00048.safetensors b/model-00001-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fa07b9516ff5a7de334acb9070183d99707de43d --- /dev/null +++ b/model-00001-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:feb719023821e172fe77931fc3c2cf9b45095586f710543bef1d6040f8cd82a8 +size 1990265312 diff --git a/model-00002-of-00048.safetensors b/model-00002-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0b83f2fcee9e2a26b8dd3a8b4eef05242508f0b6 --- /dev/null +++ b/model-00002-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adf296b59faf4d0c8e53889dac0f98fc3e3ed3bb44f3aca8da58a525c60fb189 +size 1963019104 diff --git a/model-00003-of-00048.safetensors b/model-00003-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..11b79bda357c4c867e274db94b50bb3fe02c040b --- /dev/null +++ b/model-00003-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf1802a3f1400ec3d64488d527351e0444bd9c9615de15cb3bfeb58f78eeee92 +size 1996490936 diff --git a/model-00004-of-00048.safetensors b/model-00004-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..74dbf7c4ad3e24869978df7a8f5b0d47fbcbc396 --- /dev/null +++ b/model-00004-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d3383358b4cf5552251b7f36ce97c21b4dc72e89a9d1b94398916760b7589f6 +size 1963019096 diff --git a/model-00005-of-00048.safetensors b/model-00005-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..477567cb210c6724061828e4f61fe762347c0ce9 --- /dev/null +++ b/model-00005-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b440f206c570fe9c434fcd1dbbc31975e639a98f69b5ce10b76855169cbc6bd +size 1963019104 diff --git a/model-00006-of-00048.safetensors b/model-00006-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c9af080eff997b81d1aad884322c4a8fe7544c4 --- /dev/null +++ b/model-00006-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef7cf8f6a0494b176293389f2041d0dc16660836695deb37a70512f05e12aed1 +size 1996507544 diff --git a/model-00007-of-00048.safetensors b/model-00007-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..728065d535b25f60c29689acd0a536cb5326e39e --- /dev/null +++ b/model-00007-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08030cbdabadd9dd3db16b84d128156e1c6c2a7ed8157a6e80daca5a0ffdbdcb +size 1963002488 diff --git a/model-00008-of-00048.safetensors b/model-00008-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..629f1076cc2a36f503ec7a1f3543fbee69d27347 --- /dev/null +++ b/model-00008-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68eba6958bb48316665ae46e13c2b5337655fef2d1a82dd18a22c905535e0d79 +size 1963019096 diff --git a/model-00009-of-00048.safetensors b/model-00009-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..43bb8ee0deb7d9ec5e6df79477a79bbf63c317dd --- /dev/null +++ b/model-00009-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae6441dbba66cfaaae74a5a9e477bde8c640b9268bbf36ccaf0221514c2774b4 +size 1963019104 diff --git a/model-00010-of-00048.safetensors b/model-00010-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6182f96a6fbf3a300f2869903abd1ef255bd6585 --- /dev/null +++ b/model-00010-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1f833617ca3e05135417bc8af1d4e1e9a5a207019d5e158a9235c5a41fbe378 +size 1996490936 diff --git a/model-00011-of-00048.safetensors b/model-00011-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8f07a0c77ad1f80584d8dde4de7ee88eb8667142 --- /dev/null +++ b/model-00011-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f380dd48f7a66fef422c6985dd7b0bd4d5e63234a02d01b6f3ca7ee12fc478f0 +size 1963019096 diff --git a/model-00012-of-00048.safetensors b/model-00012-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..76ee6deef653ca8f4c27a08bf45de6dec38ec662 --- /dev/null +++ b/model-00012-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78b77ee8f2ab77e2d47378cc4508c3a229e4f3ddb6598c2d525797ec20512511 +size 1963019104 diff --git a/model-00013-of-00048.safetensors b/model-00013-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..51023ac6c0d153dd11bb9032a215ed1a62cdea66 --- /dev/null +++ b/model-00013-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61d67a054422175d293380fa978f9db5a2f49c8e8461bceb74623f6de6f9b577 +size 1996490936 diff --git a/model-00014-of-00048.safetensors b/model-00014-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4880e9a28d7e8a9090dfc832dc38bbfa8a3ada50 --- /dev/null +++ b/model-00014-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:149decbaca6bdac9b3abe75423e6380856d2035acc212117fd2bfb464acbfa28 +size 1963019096 diff --git a/model-00015-of-00048.safetensors b/model-00015-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..952ecf79930e441f5ef7e3a3fe8050a33e44beac --- /dev/null +++ b/model-00015-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11c287782276e608952c91b71f1830da3d2cef08234a313fafc871b8535fb842 +size 1963019104 diff --git a/model-00016-of-00048.safetensors b/model-00016-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..94381769874fe32407bf10f90da9b0c9c5be83ca --- /dev/null +++ b/model-00016-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:840813c1f23cd529cfa8e39ebd1bbf5578964fb853f280f7c477d7f911b31ff7 +size 1996490952 diff --git a/model-00017-of-00048.safetensors b/model-00017-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bf278eb5676bb4d77355bed44294a95d12504267 --- /dev/null +++ b/model-00017-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc6249a4fc240f53b7f3c2dfc3a863a6e002c8a73e9472895f2b79b9762cf113 +size 1963019120 diff --git a/model-00018-of-00048.safetensors b/model-00018-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d4a3d9f555e922f89a08c2ed39a006c4ef4c0f03 --- /dev/null +++ b/model-00018-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d0afe4c3afc0e274c55bd50a9dd06e732e26251f6617fd399a998f975ca7ee53 +size 1963019128 diff --git a/model-00019-of-00048.safetensors b/model-00019-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..90d8e57286bfa378fde557535379a232a6fb4e19 --- /dev/null +++ b/model-00019-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:78065d429d815fa8dc871ee9c490a5634364be643b3624c514c857eb4c1a8142 +size 1996490952 diff --git a/model-00020-of-00048.safetensors b/model-00020-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3e1fab3cfdaaa96447cc3d0614a8a59119e02038 --- /dev/null +++ b/model-00020-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aaa34e072e18bbd2592740805a60c148f359fb26b02f7c88514bdeeac5827485 +size 1963019120 diff --git a/model-00021-of-00048.safetensors b/model-00021-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d12fd4928448409e1396d46d98d30fa8b3b056ef --- /dev/null +++ b/model-00021-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0dff03ddd8698feaac0786457efe5a1a398d25d8e468562a5235f07181ca33fc +size 1963019128 diff --git a/model-00022-of-00048.safetensors b/model-00022-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cd36b2bbb768839841f5ea10de5ad681df4a6fc5 --- /dev/null +++ b/model-00022-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bc5a604692d778e25302999640d1feebb2877d2518124203bb4c5ee88e0d5ca4 +size 1996490952 diff --git a/model-00023-of-00048.safetensors b/model-00023-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6bbcf610673f3db806f5f7c26308431bcfb8f850 --- /dev/null +++ b/model-00023-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:378fd4aab0ef8bee4cc5819c6c330b03a485b01b77b80f3300f1cee447ce2371 +size 1963019120 diff --git a/model-00024-of-00048.safetensors b/model-00024-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4a618bd6d1c67fbee01c8b76201d638bcc6b9dbd --- /dev/null +++ b/model-00024-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af576db4d79f04350b35b488d2ceddf82df9137c233ca9f78a0af42c93f03477 +size 1963019128 diff --git a/model-00025-of-00048.safetensors b/model-00025-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8ee68ba57c4bab5376261689325038242014c9b0 --- /dev/null +++ b/model-00025-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb4da379a85458e81ae2802ab200b76c5b0a1449f49eba55aeb9c92dcec26b69 +size 1996490952 diff --git a/model-00026-of-00048.safetensors b/model-00026-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8936da23402978f3513d13a55a26314dc6ab6073 --- /dev/null +++ b/model-00026-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3979feb295e71e8d90b89954e6786630541d9c3ec307eb0fa34f00b2515e40fa +size 1963019120 diff --git a/model-00027-of-00048.safetensors b/model-00027-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9bf020e989b2b298cabd415517be6a9f052b4c80 --- /dev/null +++ b/model-00027-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed12b8f97e3966ea33b24aab33ad1a6112fcbe8679d727bcac7646a302a2bd85 +size 1963019128 diff --git a/model-00028-of-00048.safetensors b/model-00028-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4b28613314a1098a1e6b752848ce5a5cc89844e8 --- /dev/null +++ b/model-00028-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b2f104de711c7453ec15304c2dc33b13cd1835fca1106387a5c5c182b0e18cd +size 1996490952 diff --git a/model-00029-of-00048.safetensors b/model-00029-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4f14e1fc58b76645902c8bec8746328a5262a993 --- /dev/null +++ b/model-00029-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e758ddda5761052e3c23990d735f7fe62f7fd18d27a8c4d3aefdf7a7817e3ecf +size 1963019120 diff --git a/model-00030-of-00048.safetensors b/model-00030-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3afedbcb74d7006274dcc1fe06f2e43d7b2b37be --- /dev/null +++ b/model-00030-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2068d388dedf90847ffaae7ae84f0cbd5eedd4145b7ad13200f764b6930e22c +size 1963019128 diff --git a/model-00031-of-00048.safetensors b/model-00031-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..142db4f557ef60f0112c4a95e51b61e704be596d --- /dev/null +++ b/model-00031-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7d351d77228974c8c1c59219f0de34c1031b245a17aa2753d01a185b7dfe122 +size 1996507568 diff --git a/model-00032-of-00048.safetensors b/model-00032-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ad0d43b8065eec047a049ee970a97a8080761349 --- /dev/null +++ b/model-00032-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32ddec6457bab7746960298f03951982b29a928c9325eeaf2524a0718c6ba823 +size 1963002512 diff --git a/model-00033-of-00048.safetensors b/model-00033-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cd049df68f7efaa0f35f12a749f9058deb0cff5b --- /dev/null +++ b/model-00033-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bd5782125a0aa13c7537e643b4f147e3cac8238e97e0bdae2a88ca38b4c9a99 +size 1963019120 diff --git a/model-00034-of-00048.safetensors b/model-00034-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c952fdedc724cc8bd74d3de8aaf8b1fee4caef66 --- /dev/null +++ b/model-00034-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eb7acf34db7ac038304541d117653749098c8d5a7c6ff8a40b047913e1103e00 +size 1963019128 diff --git a/model-00035-of-00048.safetensors b/model-00035-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..34d4313638d97c41f124d95cdf2d4af0afa46da6 --- /dev/null +++ b/model-00035-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5e14c1a6d8d9ad4aaeac31624f8578c6ecd921051346452113add32acb03202 +size 1996490952 diff --git a/model-00036-of-00048.safetensors b/model-00036-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3e7b36ee369a39737f385b287de2452fb5312aeb --- /dev/null +++ b/model-00036-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55f3dca68d64588e82543a930ccd4b2e429ea14bccf84ba37875edbe8a9f747c +size 1963019120 diff --git a/model-00037-of-00048.safetensors b/model-00037-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e130222a0ea00fd99b8412b8d26cb1f8b7d541f7 --- /dev/null +++ b/model-00037-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b2a79efc9bf145d7d773936ab0903f8825e66296773810fb93b0f1ce259c619 +size 1963019128 diff --git a/model-00038-of-00048.safetensors b/model-00038-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a22d7a4b442a9e8910740b965db22eba06fb0a36 --- /dev/null +++ b/model-00038-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:106ac8363e050ceb404e5b1b952c5f3f5ef301d5e1dcaeea6a6e2a665949c5ff +size 1996490952 diff --git a/model-00039-of-00048.safetensors b/model-00039-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..30ad18556b17e27841dc942c9f441bb6af516b2a --- /dev/null +++ b/model-00039-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7960ee2cc2aac5ae348dfe3e0a87e6870daf00064d8edaa841e1481821742d6 +size 1963019120 diff --git a/model-00040-of-00048.safetensors b/model-00040-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ec8f40e0c43d7e71e98c7aa3ac51befefbfa238a --- /dev/null +++ b/model-00040-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f882c4f02a50586501a81f02ef883980faa3cffc9350854264347e81611b6930 +size 1963019128 diff --git a/model-00041-of-00048.safetensors b/model-00041-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ebd8c4b40129ea76a31cad314f7b90ec074ac90c --- /dev/null +++ b/model-00041-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3947e4d3f5fdca6fec49912a79d4974a1e3ebcee2e25f4432b72ba9af5eeb723 +size 1996490952 diff --git a/model-00042-of-00048.safetensors b/model-00042-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..36b241a178881f9ad1e3fcd5b37322674317f6e8 --- /dev/null +++ b/model-00042-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1991f6bc563c64355da98abdd06293786ffb55f876295b8b8047f741704edc5a +size 1963019120 diff --git a/model-00043-of-00048.safetensors b/model-00043-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5477028303aa3dbef86070ba0b137dcac2eea8b2 --- /dev/null +++ b/model-00043-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfdb65c494099290f33359bd3cb5d3738e897e6bbfc5ea729dfe60657bafe312 +size 1963019128 diff --git a/model-00044-of-00048.safetensors b/model-00044-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..67d9fe322795cddb32e840c2e593544626b5bd60 --- /dev/null +++ b/model-00044-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0797690eb32e46142981dcbd2e7cc846fe2cca05ab550ac11288c0f0881112dc +size 1996490952 diff --git a/model-00045-of-00048.safetensors b/model-00045-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..37a96774d11e44d1c1fbd08a163f974e4ecc0524 --- /dev/null +++ b/model-00045-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1b31016919d9401a2ec85f046cd3fb5d53de1cc4d68580c95f1690eb9a5a9f9 +size 1963019120 diff --git a/model-00046-of-00048.safetensors b/model-00046-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2e5058a5e0f9cb085a0d4290f681637c477f9392 --- /dev/null +++ b/model-00046-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4813f2708f504871068afb86dbc0287815c8fff7fab7350df27a7842e465b83e +size 1963019128 diff --git a/model-00047-of-00048.safetensors b/model-00047-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..827c4102c28969e3fd50f134868121dbde3da24b --- /dev/null +++ b/model-00047-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7aa7a53968446315499f5b39a49535fd3a4c56c9e371544e227c32ef29ac85e0 +size 1996490952 diff --git a/model-00048-of-00048.safetensors b/model-00048-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c988af13631919419bb6890875619d2c643e4ebb --- /dev/null +++ b/model-00048-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cf655b1361dd6bd5e750871412a209b72574e062874dc9c2488812d97b4996b +size 614490944 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..8dae7bb1b27dd987eff4de99e6911daaa769f4f4 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,1002 @@ +{ + "metadata": { + "total_size": 93405585408 + }, + "weight_map": { + "lm_head.weight": "model-00048-of-00048.safetensors", + "model.embed_tokens.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00048.safetensors", + "model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00048.safetensors", + "model.layers.0.input_layernorm.weight": "model-00002-of-00048.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00002-of-00048.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00002-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00002-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00002-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00002-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w1.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w2.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w3.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w1.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w2.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w3.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w1.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w2.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w1.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w2.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w1.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w2.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w3.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w1.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w2.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w3.weight": "model-00004-of-00048.safetensors", + "model.layers.1.block_sparse_moe.gate.weight": "model-00002-of-00048.safetensors", + "model.layers.1.input_layernorm.weight": "model-00004-of-00048.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00004-of-00048.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00015-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00015-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w1.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w2.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w3.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w1.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w2.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w3.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w1.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w2.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w3.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w1.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w2.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w3.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w1.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w2.weight": "model-00017-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w3.weight": "model-00017-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w1.weight": "model-00017-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w2.weight": "model-00017-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w3.weight": "model-00017-of-00048.safetensors", + "model.layers.10.block_sparse_moe.gate.weight": "model-00015-of-00048.safetensors", + "model.layers.10.input_layernorm.weight": "model-00017-of-00048.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00017-of-00048.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00017-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00017-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00017-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00017-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00017-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00017-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w1.weight": "model-00017-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w2.weight": "model-00017-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w3.weight": "model-00017-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w1.weight": "model-00017-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w2.weight": "model-00017-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w3.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w1.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w2.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w3.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w1.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w2.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w3.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w1.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w2.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w3.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w1.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w2.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w3.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.gate.weight": "model-00017-of-00048.safetensors", + "model.layers.11.input_layernorm.weight": "model-00018-of-00048.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00018-of-00048.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w1.weight": "model-00018-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w2.weight": "model-00018-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w3.weight": "model-00018-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w1.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w2.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w3.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w1.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w2.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w3.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w1.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w2.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w3.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w1.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w2.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w3.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w1.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w2.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w3.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w1.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w2.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w3.weight": "model-00020-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w1.weight": "model-00020-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w2.weight": "model-00020-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w3.weight": "model-00020-of-00048.safetensors", + "model.layers.12.block_sparse_moe.gate.weight": "model-00018-of-00048.safetensors", + "model.layers.12.input_layernorm.weight": "model-00020-of-00048.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00020-of-00048.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w1.weight": "model-00020-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w2.weight": "model-00020-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w3.weight": "model-00020-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w1.weight": "model-00020-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w2.weight": "model-00020-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w3.weight": "model-00020-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w1.weight": "model-00020-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w2.weight": "model-00020-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w3.weight": "model-00020-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w1.weight": "model-00020-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w2.weight": "model-00020-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w3.weight": "model-00020-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w1.weight": "model-00021-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w2.weight": "model-00021-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w3.weight": "model-00021-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w1.weight": "model-00021-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w2.weight": "model-00021-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w3.weight": "model-00021-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w1.weight": "model-00021-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w2.weight": "model-00021-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w3.weight": "model-00021-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w1.weight": "model-00021-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w2.weight": "model-00021-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w3.weight": "model-00021-of-00048.safetensors", + "model.layers.13.block_sparse_moe.gate.weight": "model-00020-of-00048.safetensors", + "model.layers.13.input_layernorm.weight": "model-00021-of-00048.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00021-of-00048.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w1.weight": "model-00021-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w2.weight": "model-00021-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w3.weight": "model-00021-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w1.weight": "model-00021-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w2.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w3.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w1.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w2.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w3.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w1.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w2.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w3.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w1.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w2.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w3.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w1.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w2.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w3.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w1.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w2.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w3.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w1.weight": "model-00023-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w2.weight": "model-00023-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w3.weight": "model-00023-of-00048.safetensors", + "model.layers.14.block_sparse_moe.gate.weight": "model-00021-of-00048.safetensors", + "model.layers.14.input_layernorm.weight": "model-00023-of-00048.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00023-of-00048.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w1.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w2.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w3.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w1.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w2.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w3.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w1.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w2.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w3.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w1.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w2.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w3.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w1.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w2.weight": "model-00024-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w3.weight": "model-00024-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w1.weight": "model-00024-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w2.weight": "model-00024-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w3.weight": "model-00024-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w1.weight": "model-00024-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w2.weight": "model-00024-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w3.weight": "model-00024-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w1.weight": "model-00024-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w2.weight": "model-00024-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w3.weight": "model-00024-of-00048.safetensors", + "model.layers.15.block_sparse_moe.gate.weight": "model-00023-of-00048.safetensors", + "model.layers.15.input_layernorm.weight": "model-00024-of-00048.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00024-of-00048.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w1.weight": "model-00024-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w2.weight": "model-00024-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w3.weight": "model-00024-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w1.weight": "model-00024-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w2.weight": "model-00024-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w3.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w1.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w2.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w3.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w1.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w2.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w3.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w1.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w2.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w3.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w1.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w2.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w3.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w1.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w2.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w3.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w1.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w2.weight": "model-00026-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w3.weight": "model-00026-of-00048.safetensors", + "model.layers.16.block_sparse_moe.gate.weight": "model-00024-of-00048.safetensors", + "model.layers.16.input_layernorm.weight": "model-00026-of-00048.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00026-of-00048.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w1.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w2.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w3.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w1.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w2.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w3.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w1.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w2.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w3.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w1.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w2.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w3.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w1.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w2.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w3.weight": "model-00027-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w1.weight": "model-00027-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w2.weight": "model-00027-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w3.weight": "model-00027-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w1.weight": "model-00027-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w2.weight": "model-00027-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w3.weight": "model-00027-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w1.weight": "model-00027-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w2.weight": "model-00027-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w3.weight": "model-00027-of-00048.safetensors", + "model.layers.17.block_sparse_moe.gate.weight": "model-00026-of-00048.safetensors", + "model.layers.17.input_layernorm.weight": "model-00027-of-00048.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00027-of-00048.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w1.weight": "model-00027-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w2.weight": "model-00027-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w3.weight": "model-00027-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w1.weight": "model-00027-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w2.weight": "model-00027-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w3.weight": "model-00027-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w1.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w2.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w3.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w1.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w2.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w3.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w1.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w2.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w3.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w1.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w2.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w3.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w1.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w2.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w3.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w1.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w2.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w3.weight": "model-00029-of-00048.safetensors", + "model.layers.18.block_sparse_moe.gate.weight": "model-00027-of-00048.safetensors", + "model.layers.18.input_layernorm.weight": "model-00029-of-00048.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00029-of-00048.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w1.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w2.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w3.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w1.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w2.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w3.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w1.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w2.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w3.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w1.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w2.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w3.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w1.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w2.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w3.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w1.weight": "model-00030-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w2.weight": "model-00030-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w3.weight": "model-00030-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w1.weight": "model-00030-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w2.weight": "model-00030-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w3.weight": "model-00030-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w1.weight": "model-00030-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w2.weight": "model-00030-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w3.weight": "model-00030-of-00048.safetensors", + "model.layers.19.block_sparse_moe.gate.weight": "model-00029-of-00048.safetensors", + "model.layers.19.input_layernorm.weight": "model-00030-of-00048.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00030-of-00048.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w1.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w2.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w3.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w1.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w3.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w1.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w2.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w3.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w1.weight": "model-00005-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w2.weight": "model-00005-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w3.weight": "model-00005-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w1.weight": "model-00005-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w3.weight": "model-00005-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w1.weight": "model-00005-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w3.weight": "model-00005-of-00048.safetensors", + "model.layers.2.block_sparse_moe.gate.weight": "model-00004-of-00048.safetensors", + "model.layers.2.input_layernorm.weight": "model-00005-of-00048.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00005-of-00048.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w1.weight": "model-00030-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w2.weight": "model-00030-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w3.weight": "model-00030-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w1.weight": "model-00030-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w2.weight": "model-00030-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w3.weight": "model-00030-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w1.weight": "model-00030-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w2.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w3.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w1.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w2.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w3.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w1.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w2.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w3.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w1.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w2.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w3.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w1.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w2.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w3.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w1.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w2.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w3.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.gate.weight": "model-00030-of-00048.safetensors", + "model.layers.20.input_layernorm.weight": "model-00031-of-00048.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00031-of-00048.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w1.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w2.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w3.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w1.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w2.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w3.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w1.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w2.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w3.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w1.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w2.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w3.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w1.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w2.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w3.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w1.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w2.weight": "model-00033-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w3.weight": "model-00033-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w1.weight": "model-00033-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w2.weight": "model-00033-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w3.weight": "model-00033-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w1.weight": "model-00033-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w2.weight": "model-00033-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w3.weight": "model-00033-of-00048.safetensors", + "model.layers.21.block_sparse_moe.gate.weight": "model-00032-of-00048.safetensors", + "model.layers.21.input_layernorm.weight": "model-00033-of-00048.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00033-of-00048.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w1.weight": "model-00033-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w2.weight": "model-00033-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w3.weight": "model-00033-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w1.weight": "model-00033-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w2.weight": "model-00033-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w3.weight": "model-00033-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w1.weight": "model-00033-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w2.weight": "model-00033-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w3.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w1.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w2.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w3.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w1.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w2.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w3.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w1.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w2.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w3.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w1.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w2.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w3.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w1.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w2.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w3.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.gate.weight": "model-00033-of-00048.safetensors", + "model.layers.22.input_layernorm.weight": "model-00034-of-00048.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00034-of-00048.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w1.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w2.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w3.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w1.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w2.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w3.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w1.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w2.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w3.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w1.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w2.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w3.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w1.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w2.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w3.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w1.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w2.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w3.weight": "model-00036-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w1.weight": "model-00036-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w2.weight": "model-00036-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w3.weight": "model-00036-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w1.weight": "model-00036-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w2.weight": "model-00036-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w3.weight": "model-00036-of-00048.safetensors", + "model.layers.23.block_sparse_moe.gate.weight": "model-00034-of-00048.safetensors", + "model.layers.23.input_layernorm.weight": "model-00036-of-00048.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00036-of-00048.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w1.weight": "model-00036-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w2.weight": "model-00036-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w3.weight": "model-00036-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w1.weight": "model-00036-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w2.weight": "model-00036-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w3.weight": "model-00036-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w1.weight": "model-00036-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w2.weight": "model-00036-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w3.weight": "model-00036-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w1.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w2.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w3.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w1.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w2.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w3.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w1.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w2.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w3.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w1.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w2.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w3.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w1.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w2.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w3.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.gate.weight": "model-00036-of-00048.safetensors", + "model.layers.24.input_layernorm.weight": "model-00037-of-00048.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00037-of-00048.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w1.weight": "model-00037-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w2.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w3.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w1.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w2.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w3.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w1.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w2.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w3.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w1.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w2.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w3.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w1.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w2.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w3.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w1.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w2.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w3.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w1.weight": "model-00039-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w2.weight": "model-00039-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w3.weight": "model-00039-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w1.weight": "model-00039-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w2.weight": "model-00039-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w3.weight": "model-00039-of-00048.safetensors", + "model.layers.25.block_sparse_moe.gate.weight": "model-00037-of-00048.safetensors", + "model.layers.25.input_layernorm.weight": "model-00039-of-00048.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00039-of-00048.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w1.weight": "model-00039-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w2.weight": "model-00039-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w3.weight": "model-00039-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w1.weight": "model-00039-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w2.weight": "model-00039-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w3.weight": "model-00039-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w1.weight": "model-00039-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w2.weight": "model-00039-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w3.weight": "model-00039-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w1.weight": "model-00039-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w2.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w3.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w1.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w2.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w3.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w1.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w2.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w3.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w1.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w2.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w3.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w1.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w2.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w3.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.gate.weight": "model-00039-of-00048.safetensors", + "model.layers.26.input_layernorm.weight": "model-00040-of-00048.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00040-of-00048.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w1.weight": "model-00040-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w2.weight": "model-00040-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w3.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w1.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w2.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w3.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w1.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w2.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w3.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w1.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w2.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w3.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w1.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w2.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w3.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w1.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w2.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w3.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w1.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w2.weight": "model-00042-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w3.weight": "model-00042-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w1.weight": "model-00042-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w2.weight": "model-00042-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w3.weight": "model-00042-of-00048.safetensors", + "model.layers.27.block_sparse_moe.gate.weight": "model-00040-of-00048.safetensors", + "model.layers.27.input_layernorm.weight": "model-00042-of-00048.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00042-of-00048.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w1.weight": "model-00042-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w2.weight": "model-00042-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w3.weight": "model-00042-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w1.weight": "model-00042-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w2.weight": "model-00042-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w3.weight": "model-00042-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w1.weight": "model-00042-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w2.weight": "model-00042-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w3.weight": "model-00042-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w1.weight": "model-00042-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w2.weight": "model-00042-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w3.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w1.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w2.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w3.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w1.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w2.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w3.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w1.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w2.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w3.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w1.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w2.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w3.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.gate.weight": "model-00042-of-00048.safetensors", + "model.layers.28.input_layernorm.weight": "model-00043-of-00048.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00043-of-00048.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w1.weight": "model-00043-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w2.weight": "model-00043-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w3.weight": "model-00043-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w1.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w2.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w3.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w1.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w2.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w3.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w1.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w2.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w3.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w1.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w2.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w3.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w1.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w2.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w3.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w1.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w2.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w3.weight": "model-00045-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w1.weight": "model-00045-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w2.weight": "model-00045-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w3.weight": "model-00045-of-00048.safetensors", + "model.layers.29.block_sparse_moe.gate.weight": "model-00043-of-00048.safetensors", + "model.layers.29.input_layernorm.weight": "model-00045-of-00048.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00045-of-00048.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00005-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00005-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00005-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00005-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w1.weight": "model-00005-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w3.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w1.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w3.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w1.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w3.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w1.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w3.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w1.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w3.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w1.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w3.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.gate.weight": "model-00005-of-00048.safetensors", + "model.layers.3.input_layernorm.weight": "model-00006-of-00048.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00006-of-00048.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w1.weight": "model-00045-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w2.weight": "model-00045-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w3.weight": "model-00045-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w1.weight": "model-00045-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w2.weight": "model-00045-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w3.weight": "model-00045-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w1.weight": "model-00045-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w2.weight": "model-00045-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w3.weight": "model-00045-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w1.weight": "model-00045-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w2.weight": "model-00045-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w3.weight": "model-00045-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w1.weight": "model-00046-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w2.weight": "model-00046-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w3.weight": "model-00046-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w1.weight": "model-00046-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w2.weight": "model-00046-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w3.weight": "model-00046-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w1.weight": "model-00046-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w2.weight": "model-00046-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w3.weight": "model-00046-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w1.weight": "model-00046-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w2.weight": "model-00046-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w3.weight": "model-00046-of-00048.safetensors", + "model.layers.30.block_sparse_moe.gate.weight": "model-00045-of-00048.safetensors", + "model.layers.30.input_layernorm.weight": "model-00046-of-00048.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00046-of-00048.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w1.weight": "model-00046-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w2.weight": "model-00046-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w3.weight": "model-00046-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w1.weight": "model-00046-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w2.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w3.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w1.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w2.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w3.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w1.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w2.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w3.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w1.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w2.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w3.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w1.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w2.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w3.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w1.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w2.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w3.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w1.weight": "model-00048-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w2.weight": "model-00048-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w3.weight": "model-00048-of-00048.safetensors", + "model.layers.31.block_sparse_moe.gate.weight": "model-00046-of-00048.safetensors", + "model.layers.31.input_layernorm.weight": "model-00048-of-00048.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00048-of-00048.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w1.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w2.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w3.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w1.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w2.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w3.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w1.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w2.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w3.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w1.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w2.weight": "model-00008-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w3.weight": "model-00008-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w1.weight": "model-00008-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w2.weight": "model-00008-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w3.weight": "model-00008-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w1.weight": "model-00008-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w2.weight": "model-00008-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w3.weight": "model-00008-of-00048.safetensors", + "model.layers.4.block_sparse_moe.gate.weight": "model-00007-of-00048.safetensors", + "model.layers.4.input_layernorm.weight": "model-00008-of-00048.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00008-of-00048.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00008-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00008-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00008-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00008-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w1.weight": "model-00008-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w2.weight": "model-00008-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w3.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w1.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w2.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w3.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w1.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w2.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w3.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w1.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w2.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w3.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w1.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w2.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w3.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w1.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w2.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w3.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.gate.weight": "model-00008-of-00048.safetensors", + "model.layers.5.input_layernorm.weight": "model-00009-of-00048.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00009-of-00048.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w1.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w2.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w3.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w1.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w2.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w3.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w1.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w2.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w3.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w1.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w2.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w3.weight": "model-00011-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w1.weight": "model-00011-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w2.weight": "model-00011-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w3.weight": "model-00011-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w1.weight": "model-00011-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w2.weight": "model-00011-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w3.weight": "model-00011-of-00048.safetensors", + "model.layers.6.block_sparse_moe.gate.weight": "model-00009-of-00048.safetensors", + "model.layers.6.input_layernorm.weight": "model-00011-of-00048.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00011-of-00048.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00011-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00011-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00011-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00011-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00011-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00011-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w1.weight": "model-00011-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w2.weight": "model-00011-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w3.weight": "model-00011-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w1.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w2.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w3.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w1.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w2.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w3.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w1.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w2.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w3.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w1.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w2.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w3.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w1.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w2.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w3.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.gate.weight": "model-00011-of-00048.safetensors", + "model.layers.7.input_layernorm.weight": "model-00012-of-00048.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00012-of-00048.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00012-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w1.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w2.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w3.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w1.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w2.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w3.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w1.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w2.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w3.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w1.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w2.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w3.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w1.weight": "model-00014-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w2.weight": "model-00014-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w3.weight": "model-00014-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w1.weight": "model-00014-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w2.weight": "model-00014-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w3.weight": "model-00014-of-00048.safetensors", + "model.layers.8.block_sparse_moe.gate.weight": "model-00012-of-00048.safetensors", + "model.layers.8.input_layernorm.weight": "model-00014-of-00048.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00014-of-00048.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00014-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00014-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00014-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00014-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00014-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00014-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w1.weight": "model-00014-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w2.weight": "model-00014-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w3.weight": "model-00014-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w1.weight": "model-00014-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w2.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w3.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w1.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w2.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w3.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w1.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w2.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w3.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w1.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w2.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w3.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w1.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w2.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w3.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.gate.weight": "model-00014-of-00048.safetensors", + "model.layers.9.input_layernorm.weight": "model-00015-of-00048.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00015-of-00048.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00014-of-00048.safetensors", + "model.norm.weight": "model-00048-of-00048.safetensors" + } +}