diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,9583 @@ +{ + "metadata": { + "ParamSize": 709, + "ParamBytes": 16895535104.0, + "BitsPerParam": 3.0067237203635373 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 389283840, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 640, + 152064 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 389283840, + "byteOffset": 0 + } + ], + "md5sum": "d38afcd82306594fb1f01a676a89254a" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.63.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "acda2786be9673b2ce87cc57b01b0b3a" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.63.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "286b3c85da12e65f0ee32f05ce67d90a" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 389283840, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 152064, + 640 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 389283840, + "byteOffset": 0 + } + ], + "md5sum": "81dc4cbe141d73286c525c6e0b4e743f" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "fe099c1452569ffed71b77209d807869" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "0fa095862c96029d652a6544226ba2fd" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 33218560, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 40, + 152064 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12165120, + "byteOffset": 0 + }, + { + "name": "model.layers.63.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 12165120 + }, + { + "name": "model.layers.63.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 12175360 + }, + { + "name": "model.layers.63.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 14387200 + }, + { + "name": "model.layers.63.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 18810880 + }, + { + "name": "model.norm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 18821120 + }, + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 152064, + 40 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 12165120, + "byteOffset": 18831360 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 30996480 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 31006720 + } + ], + "md5sum": "f3e94aee5a0a9351986e810eeb994fbf" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 23371776, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 0 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 4423680 + }, + { + "name": "model.layers.0.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 4433920 + }, + { + "name": "model.layers.0.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 4448256 + }, + { + "name": "model.layers.0.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 22798336 + } + ], + "md5sum": "f0bc07957ca3f7a68ff114f776a39015" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "4f9958bfcd06555dda3c46be941d84e5" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "7e71867a4770b3d00e5f5a3b91f58abf" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.1.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "ce44e1852c6418f78e0fde0a02cc86e5" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.1.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.1.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "63441832b665917822906599b0f95fc7" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "941b2a8252cb52bf66573bee4925f0fa" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "f3cb59a00b241c7b23eb48de535b2b6d" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.2.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "cec53c6ad9acea483afcfdced0820174" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.2.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.2.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "1a9a57369e0443d1e0480ab0905b0e9f" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "0d23d4ff5e31847925164d915082a354" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.3.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "c6e99c735d7311629717ed7fe1a62485" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "18b7c72b941ae836d87325e3d3dce0f9" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 32055296, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 13516800 + }, + { + "name": "model.layers.3.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17940480 + }, + { + "name": "model.layers.3.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 17954816 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18528256 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31635456 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32045056 + } + ], + "md5sum": "210d4410e6d5b824ef1e387341029b69" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "a89fc9065e98bea11eeea09d3a308ad4" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 25583616, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 2211840 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 6635520 + }, + { + "name": "model.layers.10.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 6645760 + }, + { + "name": "model.layers.10.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 6660096 + }, + { + "name": "model.layers.10.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 25010176 + } + ], + "md5sum": "cdd101ec02016ec8cf06f783f98c845c" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "daaf26be908e1987efa322ffab03e411" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "a92db6cf1bfe65d8f77eb68704265bf1" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.11.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "0fb776f9efe40ab67c8ec798720fae93" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.11.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.11.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "e5cf770e64f35dee919bbf07a5400f0f" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "a46a68cb997d73d33976ae1fa745bf70" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "e216b21f019ce9f475ae041d4222154d" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.12.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "eb133702f79a91e6122cbeefecc60d52" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.12.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.12.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "c9301b3da030f3a87d1495bdf363c9ca" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "741e42ec2b78543024a35b6848fe626f" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.13.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "cb7faa5bef5980a8acf79e358f62c0ef" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "80e295345d0def9ec043929541f7daa0" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 32055296, + "records": [ + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 13516800 + }, + { + "name": "model.layers.13.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17940480 + }, + { + "name": "model.layers.13.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 17954816 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18528256 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31635456 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32045056 + } + ], + "md5sum": "ee9d728754a1d0de322c7b5d6469ec97" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "733251277da8b087c2d205c53294aa71" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "1a0cbc61c9b08bda14d2be90d99f8a1c" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "bc19090272fbdfd9c47bd831a37337db" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 32239616, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 2211840 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 6635520 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 6645760 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 6656000 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 8867840 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13291520 + }, + { + "name": "model.layers.9.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 13301760 + }, + { + "name": "model.layers.9.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 13316096 + }, + { + "name": "model.layers.9.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 31666176 + } + ], + "md5sum": "2629706adb26bba64d78ed0808542722" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "636277e1434de5495a37ea8664cec4c0" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "422e3efe4e824ba307c5726184472900" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "2328983881ef89c1b28b039ad4115415" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.14.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "cc753dc33b62d262bd3588d5de840ca2" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 22992896, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 15738880 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 15749120 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 15759360 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 17971200 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 22394880 + }, + { + "name": "model.layers.14.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 22405120 + }, + { + "name": "model.layers.14.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 22419456 + } + ], + "md5sum": "b397fa17be6ef07283bb9ac298c9e5b5" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "154524cd66ae525a1d91d525dc50b6a9" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "847e94bedfe4c912f150eef0391a78a3" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.15.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "647616f7600b3e7ffdbbb1cd067f6c05" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.15.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.15.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "b18ed4c7231280c8f5b52c18b35ea156" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "9a5f77a02320c08bbcd6c5d4323eece9" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "1ffb30c6cf0f1d0d7847b0d6ebad67fb" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.16.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "ccd52a7403ea6b9560c6790b193a5fd8" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.16.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.16.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "a6d26d4cf16fc3bc79b608c3463c008b" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "33dc10f5d1633b6bb082e0ce6ba131c3" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "c83345ad81185ef063449ed83fbf9b6a" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.17.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "7c24b93f27c00d6159cab396490d5e65" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.17.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.17.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "15c960e33cb79848fff6d97a74740b0e" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "52a5d0f98ccb3cb1743b47fb79232523" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.18.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "4610d2765f664fff473eaf583be37aff" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "426bf6b4e6c2631a69d2f8e24c1a1787" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 32055296, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 13516800 + }, + { + "name": "model.layers.18.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17940480 + }, + { + "name": "model.layers.18.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 17954816 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18528256 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31635456 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32045056 + } + ], + "md5sum": "1b7a844c3de9613ac2443a2738a53bea" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "677d62edf49ac06f58b0d174d1e17f3f" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "f8134125de28134e70a7d0a9f2880802" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 27815936, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2222080 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 2232320 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 4444160 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8867840 + }, + { + "name": "model.layers.19.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 8878080 + }, + { + "name": "model.layers.19.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 8892416 + }, + { + "name": "model.layers.19.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 27242496 + } + ], + "md5sum": "94e6cbe67bbf012d73ae4de165517256" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "1f65b8b6fa62b30f4b8e6dce2c269e66" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "770d8a922a07d50e4b339f9ee62037b2" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.20.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "677fc54a00f9ae9d013ed8e7a3c713d6" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.20.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.20.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "77490fcad3c02b98c8de9e07c8c86428" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "849aa66abaf5f801618b909b39dc17e3" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "11d552441b44fa44728f0fd42e96943c" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.21.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "448e7ace26ca598a089ef057acd19aeb" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.21.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.21.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "482ab1979606f8a43128ad7ea32c51a0" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "9f5b7a80a212d44b165298aa40cfac64" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "adfb6ad05b75eba1801f698e563d295f" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.22.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "dbfe90845e977f2afccb8d49c5fe4933" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.22.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.22.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "bcbba6736037326185d700333a71b9aa" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "516e0c7a334a9dddea8560b123806cb4" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.23.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "ae1837eba24c0495b29faecd5c7dab2d" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e06366e02818d75ab98bc30381044420" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 32055296, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 13516800 + }, + { + "name": "model.layers.23.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17940480 + }, + { + "name": "model.layers.23.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 17954816 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18528256 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31635456 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32045056 + } + ], + "md5sum": "9f000010d5f38bcd90af3dc8105c155f" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "06bc6d2cbeaecf69be3369191edfe7fe" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "c33b443ba6e80b66426197b9f6550fcf" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 27815936, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2222080 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 2232320 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 4444160 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8867840 + }, + { + "name": "model.layers.24.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 8878080 + }, + { + "name": "model.layers.24.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 8892416 + }, + { + "name": "model.layers.24.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 27242496 + } + ], + "md5sum": "3e526ce2b82bc44a5c4e720f4defd580" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "bd7e7adb49b9f262d912c3058d9c12ed" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "9520255eccd73f0c3f43573fcc153282" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.25.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "8b18ba064e36238fc01532290a91f320" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.25.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.25.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "ad5ded6836dd1ba17ce5b723f24cd4ec" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "991341efead3379fae44a1cd9a0acc99" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "cbe6060e6d0fe938ad48411981ffe8e3" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.26.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "a8662732d04af5b3bd8602843528fb06" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.26.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.26.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "9995a2b6cb2b75e958552aae2ddb3d50" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "2df0929609b159ef9dd26832579e479e" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "72be7d5648737c7d0be5283d459d87b5" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.27.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "da13409c49de26541251219412c7da4e" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.27.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.27.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "7f0ec36f874ed086a837d41fec1bde86" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "3a18ef04802c8757a446cf950c89bce5" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.28.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "4fb8ab3a1a1d87c82ebfd48595dd267d" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "b4071765ae236cea87a86c0455189c45" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 32055296, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 13516800 + }, + { + "name": "model.layers.28.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17940480 + }, + { + "name": "model.layers.28.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 17954816 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18528256 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31635456 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32045056 + } + ], + "md5sum": "ec752b3269c9cf467ab3c05156d7ce1b" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "a41f14e0624d4aa5b7c34cd474cfd2e7" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "a563b37cbd71f44f06298f03340cab77" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 27815936, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2222080 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 2232320 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 4444160 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8867840 + }, + { + "name": "model.layers.29.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 8878080 + }, + { + "name": "model.layers.29.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 8892416 + }, + { + "name": "model.layers.29.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 27242496 + } + ], + "md5sum": "55c45ec249107bdbfbbad815cb7e8036" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "8a8f2ddddde399ae9192f18fd4547b6b" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "95e56040860d3b8dbede826b22cec795" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.30.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "abb787ba6e46a7417bc8221c6af3a7c8" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.30.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.30.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "4dfb1be23a7187129685c8af34f3ca25" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "35e37f980205859775df79791147e510" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "be20f0ee5de20481a0068da91e59f364" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.31.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "eed7b3feb3cb029c1d8262139807f0d3" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.31.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.31.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "99a0476844e6fec86ba0d1e75d124b00" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.32.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "d3ba487355af5dc8a00e394a39fdc3f9" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.32.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "852a5d447fdd4d9b867fbc58611f5d9f" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.32.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "337beadf3f290e087fe704e8cef73416" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.32.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.32.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.32.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.32.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.32.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.32.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "8f3db0e01f28e6ce65a4fbbb84e743e1" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.33.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "51270eac69cc1548b7f34ffbf3ccedfb" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.33.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "3dda21b38ed5f95dcaff807890daed33" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "d166efd6e663862326dd0a9df9fd0959" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 32055296, + "records": [ + { + "name": "model.layers.32.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.32.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.33.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 13516800 + }, + { + "name": "model.layers.33.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17940480 + }, + { + "name": "model.layers.33.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 17954816 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18528256 + }, + { + "name": "model.layers.33.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31635456 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32045056 + } + ], + "md5sum": "06d973ceb3519d20e81be851298623bb" + }, + { + "dataPath": "params_shard_116.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "4783d3c077f98ce834f1547e5fc72a1a" + }, + { + "dataPath": "params_shard_117.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "1ec2794648fdb0ca40ade1eac403f756" + }, + { + "dataPath": "params_shard_118.bin", + "format": "raw-shard", + "nbytes": 27815936, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2222080 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 2232320 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 4444160 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8867840 + }, + { + "name": "model.layers.4.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 8878080 + }, + { + "name": "model.layers.4.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 8892416 + }, + { + "name": "model.layers.4.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 27242496 + } + ], + "md5sum": "1e38a22979e2eeba9e7d34599e14dfc4" + }, + { + "dataPath": "params_shard_119.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "a92eb6917515ec0905f153e48353e2cd" + }, + { + "dataPath": "params_shard_120.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "ddfb497e4cce5c634c23226c03788991" + }, + { + "dataPath": "params_shard_121.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.5.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "55f78c97208f04a1b6b97a087526e41f" + }, + { + "dataPath": "params_shard_122.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.5.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.5.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "41670981366657a70745a28b70fb456f" + }, + { + "dataPath": "params_shard_123.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "5fcb8b7c316f58a3c3620aec2b0a8604" + }, + { + "dataPath": "params_shard_124.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "46b4d518c1970e3243605aa274c3e24a" + }, + { + "dataPath": "params_shard_125.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.6.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "071d970eccffb4c22e48cc4ab86e2388" + }, + { + "dataPath": "params_shard_126.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.6.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.6.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "12a5096bccc3abe702f02f6ecf175307" + }, + { + "dataPath": "params_shard_127.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "15dba97d9d6b228fc2d4998083ceed8b" + }, + { + "dataPath": "params_shard_128.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "fa16ebbd0546c571288c3557bc734218" + }, + { + "dataPath": "params_shard_129.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.7.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "58100f3a7d802201c6e3b3c4681cb754" + }, + { + "dataPath": "params_shard_130.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.7.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.7.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "965e2e2f755c12ae34dadcae356a1b0a" + }, + { + "dataPath": "params_shard_131.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.8.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 13516800 + }, + { + "name": "model.layers.8.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 13531136 + }, + { + "name": "model.layers.8.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 31881216 + } + ], + "md5sum": "c5f8480542a3eb2b0c05c1a3d31c8746" + }, + { + "dataPath": "params_shard_132.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.33.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "28c8f3ae7171167efcf2b7ccc8677a90" + }, + { + "dataPath": "params_shard_133.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.34.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "fff2b8442e6f3978843ce5d570cb8eb5" + }, + { + "dataPath": "params_shard_134.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.34.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "73f0f2d74d782631ffcaed993e7e5954" + }, + { + "dataPath": "params_shard_135.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.34.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "54af7f72f723c8c92dfb71182950e7e5" + }, + { + "dataPath": "params_shard_136.bin", + "format": "raw-shard", + "nbytes": 22992896, + "records": [ + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.33.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.33.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.33.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 15738880 + }, + { + "name": "model.layers.34.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 15749120 + }, + { + "name": "model.layers.34.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 15759360 + }, + { + "name": "model.layers.34.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 17971200 + }, + { + "name": "model.layers.34.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 22394880 + }, + { + "name": "model.layers.34.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 22405120 + }, + { + "name": "model.layers.34.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 22419456 + } + ], + "md5sum": "aabba2f6eec353a44d5448534a5921fd" + }, + { + "dataPath": "params_shard_137.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.35.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "cf4274cfc9a0b1d38715e7ff874c6604" + }, + { + "dataPath": "params_shard_138.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.35.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "9a55553b13b282e32264b057414ab44f" + }, + { + "dataPath": "params_shard_139.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.35.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "c4f78b8ec84425a3e341b941ef6f24e3" + }, + { + "dataPath": "params_shard_140.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.34.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.34.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.35.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.35.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.35.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.35.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.35.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.35.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "db83fb0d62a03dbb343b693c9b3fc5cc" + }, + { + "dataPath": "params_shard_141.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.36.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "7a924f3546d1f17ec1d6c02f9b19f555" + }, + { + "dataPath": "params_shard_142.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.36.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "c5c3594994b05a9226d19d3a340c6ea5" + }, + { + "dataPath": "params_shard_143.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.36.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "3900685cdc3ad30076e80ee20f3e1178" + }, + { + "dataPath": "params_shard_144.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.35.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.35.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.36.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.36.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.36.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.36.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.36.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.36.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "72ec0af4fc2e254a449bea119ae27628" + }, + { + "dataPath": "params_shard_145.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.37.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "30e09008fdab2b05d8c6b6fabda1ae09" + }, + { + "dataPath": "params_shard_146.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.37.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "62da39ef32ff78eb5f7ec064d182fad1" + }, + { + "dataPath": "params_shard_147.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.37.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "3334dbdfb55ccbba1c79d00187aa99f1" + }, + { + "dataPath": "params_shard_148.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.36.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.36.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.37.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.37.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.37.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.37.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.37.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.37.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "1b4d6c6929a4cdf0d213403bbe8d9319" + }, + { + "dataPath": "params_shard_149.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.38.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "0a9c2f4fb77d16c8a32030ff09efc40d" + }, + { + "dataPath": "params_shard_150.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.38.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "94fc019f90a84dfcda7da1ba3ebe7ac6" + }, + { + "dataPath": "params_shard_151.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "09c19f09d6002e918ec7670c1202204d" + }, + { + "dataPath": "params_shard_152.bin", + "format": "raw-shard", + "nbytes": 32055296, + "records": [ + { + "name": "model.layers.37.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.37.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.38.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 13516800 + }, + { + "name": "model.layers.38.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17940480 + }, + { + "name": "model.layers.38.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 17954816 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18528256 + }, + { + "name": "model.layers.38.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31635456 + }, + { + "name": "model.layers.38.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32045056 + } + ], + "md5sum": "968cb5915bf1099a23820a133c208ee7" + }, + { + "dataPath": "params_shard_153.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.39.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "a792204398660658c51bec123b3937e4" + }, + { + "dataPath": "params_shard_154.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.39.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "0f11eb715ad7f44ef7f51cfaace1a7a1" + }, + { + "dataPath": "params_shard_155.bin", + "format": "raw-shard", + "nbytes": 27815936, + "records": [ + { + "name": "model.layers.38.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.38.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.39.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2222080 + }, + { + "name": "model.layers.39.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 2232320 + }, + { + "name": "model.layers.39.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 4444160 + }, + { + "name": "model.layers.39.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8867840 + }, + { + "name": "model.layers.39.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 8878080 + }, + { + "name": "model.layers.39.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 8892416 + }, + { + "name": "model.layers.39.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 27242496 + } + ], + "md5sum": "71785c0d85526778a0e76e6bab66ae9e" + }, + { + "dataPath": "params_shard_156.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.40.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "e2cf6a355e7a4cb1402768cf3f7b9d0e" + }, + { + "dataPath": "params_shard_157.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.40.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "cb6fac174acd8f07f9b2a9b7d398ea55" + }, + { + "dataPath": "params_shard_158.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.40.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "dd782575907b6a133af98713ea75db47" + }, + { + "dataPath": "params_shard_159.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.39.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.39.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.40.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.40.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.40.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.40.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.40.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.40.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "507e8440576f3a909c8c165f8011bd19" + }, + { + "dataPath": "params_shard_160.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.41.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "536b89c2d5c20d3acaf2a525cbd094cf" + }, + { + "dataPath": "params_shard_161.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.41.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "0e50f98eff90a53d40c70ecb45f0f4d0" + }, + { + "dataPath": "params_shard_162.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.41.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "6fdb73ff8e45c0476640cf958ae08547" + }, + { + "dataPath": "params_shard_163.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.40.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.40.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.41.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.41.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.41.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.41.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.41.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.41.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "8b7b8fd2ccb2af5c90be8f7fbe4edc46" + }, + { + "dataPath": "params_shard_164.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.42.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "7f3e99f133fa2bb1be26519f25b18968" + }, + { + "dataPath": "params_shard_165.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.42.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "972318bb4df8bf3fa72c7507a12837da" + }, + { + "dataPath": "params_shard_166.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.42.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "53d009f3c83495ea5edf151df344e46b" + }, + { + "dataPath": "params_shard_167.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.41.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.41.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.42.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.42.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.42.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.42.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.42.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.42.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "47540bf4282a45b8c44e6503efaace9c" + }, + { + "dataPath": "params_shard_168.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.43.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "fdd80034ff28e6803426964bf71514f8" + }, + { + "dataPath": "params_shard_169.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.43.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "8a61baf2ac4f95962125aeeba7f5368d" + }, + { + "dataPath": "params_shard_170.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "f0e866819a68f5d71b076420c4d7f58c" + }, + { + "dataPath": "params_shard_171.bin", + "format": "raw-shard", + "nbytes": 32055296, + "records": [ + { + "name": "model.layers.42.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.42.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.43.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 13516800 + }, + { + "name": "model.layers.43.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17940480 + }, + { + "name": "model.layers.43.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 17954816 + }, + { + "name": "model.layers.43.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18528256 + }, + { + "name": "model.layers.43.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31635456 + }, + { + "name": "model.layers.43.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32045056 + } + ], + "md5sum": "1428f850c350fb1c568dfef088423f52" + }, + { + "dataPath": "params_shard_172.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.44.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "f06bf939cceb6c89b85fab6325ea4ab3" + }, + { + "dataPath": "params_shard_173.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.44.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "559a6b462ab083907cf8c1f3248d7806" + }, + { + "dataPath": "params_shard_174.bin", + "format": "raw-shard", + "nbytes": 27815936, + "records": [ + { + "name": "model.layers.43.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.43.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.44.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2222080 + }, + { + "name": "model.layers.44.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 2232320 + }, + { + "name": "model.layers.44.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 4444160 + }, + { + "name": "model.layers.44.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8867840 + }, + { + "name": "model.layers.44.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 8878080 + }, + { + "name": "model.layers.44.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 8892416 + }, + { + "name": "model.layers.44.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 27242496 + } + ], + "md5sum": "4e09db080c2e85c4875147cf80490457" + }, + { + "dataPath": "params_shard_175.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.45.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "f1acf7809e4e703ef1ee636a9ebb0eb6" + }, + { + "dataPath": "params_shard_176.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.45.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "5d9dee209e221c1e10f9200b0423f78b" + }, + { + "dataPath": "params_shard_177.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.45.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "ea7068f592075fc6727b3b66c89f09e8" + }, + { + "dataPath": "params_shard_178.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.44.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.44.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.45.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.45.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.45.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.45.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.45.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.45.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "666d036005209240494e2b604efa90a7" + }, + { + "dataPath": "params_shard_179.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.46.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "372be280d9e1c5c324108b806cddb9e8" + }, + { + "dataPath": "params_shard_180.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.46.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "973bdadb45e41ccaa1ed6cabde183ff7" + }, + { + "dataPath": "params_shard_181.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.46.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "5c66412115c3681bf8d62969e6c5e699" + }, + { + "dataPath": "params_shard_182.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.45.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.45.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.46.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.46.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.46.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.46.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.46.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.46.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "3f8d3e0ce96b9000e9f66de083e91b81" + }, + { + "dataPath": "params_shard_183.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.47.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "f4b813f4c8cfc72839c15d02ed4a5fca" + }, + { + "dataPath": "params_shard_184.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.47.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "e1c852be2a1eeabd3959d01c4d146dfa" + }, + { + "dataPath": "params_shard_185.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.47.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "14c8bc706d7a3c2eb9554ab35dcb8e05" + }, + { + "dataPath": "params_shard_186.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.46.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.46.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.47.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.47.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.47.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.47.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.47.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.47.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "97437794891562a9e27fac9ba64cb189" + }, + { + "dataPath": "params_shard_187.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.48.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "5a302b60e3abe401db0800c4a35ff2da" + }, + { + "dataPath": "params_shard_188.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.48.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "c381af65da35742403e6049ff712dad9" + }, + { + "dataPath": "params_shard_189.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "f8f4392b6d65957ec366280f8becbc84" + }, + { + "dataPath": "params_shard_190.bin", + "format": "raw-shard", + "nbytes": 32055296, + "records": [ + { + "name": "model.layers.47.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.47.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.48.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 13516800 + }, + { + "name": "model.layers.48.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17940480 + }, + { + "name": "model.layers.48.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 17954816 + }, + { + "name": "model.layers.48.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18528256 + }, + { + "name": "model.layers.48.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31635456 + }, + { + "name": "model.layers.48.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32045056 + } + ], + "md5sum": "a77f78b6b02f17147fc220fba6ae5baa" + }, + { + "dataPath": "params_shard_191.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.49.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "d56c1f8c87732b83ee3e1c441b0ec3bc" + }, + { + "dataPath": "params_shard_192.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.49.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "ff84fb15ec43b4a93254c2f54a5df08a" + }, + { + "dataPath": "params_shard_193.bin", + "format": "raw-shard", + "nbytes": 27815936, + "records": [ + { + "name": "model.layers.48.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.48.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.49.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2222080 + }, + { + "name": "model.layers.49.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 2232320 + }, + { + "name": "model.layers.49.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 4444160 + }, + { + "name": "model.layers.49.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8867840 + }, + { + "name": "model.layers.49.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 8878080 + }, + { + "name": "model.layers.49.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 8892416 + }, + { + "name": "model.layers.49.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 27242496 + } + ], + "md5sum": "04b722a684398c5c6414c16f112cd24e" + }, + { + "dataPath": "params_shard_194.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.50.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "0b52e2d48fcd8c8c41c8b8fdf4309fc0" + }, + { + "dataPath": "params_shard_195.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.50.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "40105a11b07e999155683683c5e91ae2" + }, + { + "dataPath": "params_shard_196.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.50.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "d9fbc19087fd63bae437f671c74efbc3" + }, + { + "dataPath": "params_shard_197.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.49.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.49.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.50.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.50.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.50.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.50.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.50.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.50.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "6cdef4a7706eb909be7d4852854b9114" + }, + { + "dataPath": "params_shard_198.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.51.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "4d1f28a1b042075b87e80e545e817e0e" + }, + { + "dataPath": "params_shard_199.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.51.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "9d959d9be2019e88cce53bddc33f37c7" + }, + { + "dataPath": "params_shard_200.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.51.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "0c2dff11d47a3c2e746037957ed9e92d" + }, + { + "dataPath": "params_shard_201.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.50.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.50.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.51.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.51.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.51.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.51.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.51.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.51.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "2a42a6da6a6b87131ad9ee3f3be0c35d" + }, + { + "dataPath": "params_shard_202.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.52.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "de81ef363fe75f1e1b19dab7d46a475b" + }, + { + "dataPath": "params_shard_203.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.52.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "32421725decaa744404e428da184ae02" + }, + { + "dataPath": "params_shard_204.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.52.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "b10f19a00be3bd70ddafa032bedbf035" + }, + { + "dataPath": "params_shard_205.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.51.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.51.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.52.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.52.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.52.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.52.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.52.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.52.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "6c19d0a8f8332eecac4c4d42f20dff69" + }, + { + "dataPath": "params_shard_206.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.53.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "5455b9d3808f79efe455a411c5a90b02" + }, + { + "dataPath": "params_shard_207.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.53.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "d182c89791e87c6999705b4acf08451a" + }, + { + "dataPath": "params_shard_208.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "4d145e344372ae0f31915278de9aff0b" + }, + { + "dataPath": "params_shard_209.bin", + "format": "raw-shard", + "nbytes": 32055296, + "records": [ + { + "name": "model.layers.52.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.52.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.53.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 13516800 + }, + { + "name": "model.layers.53.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17940480 + }, + { + "name": "model.layers.53.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 17954816 + }, + { + "name": "model.layers.53.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18528256 + }, + { + "name": "model.layers.53.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31635456 + }, + { + "name": "model.layers.53.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32045056 + } + ], + "md5sum": "a48fba8ba201df93c897825b4e166880" + }, + { + "dataPath": "params_shard_210.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.54.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "603a2295996572acfe9eeff16e0b7459" + }, + { + "dataPath": "params_shard_211.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.54.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "95e5b8ad825803e1134c23945d3272bb" + }, + { + "dataPath": "params_shard_212.bin", + "format": "raw-shard", + "nbytes": 27815936, + "records": [ + { + "name": "model.layers.53.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.53.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.54.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2222080 + }, + { + "name": "model.layers.54.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 2232320 + }, + { + "name": "model.layers.54.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 4444160 + }, + { + "name": "model.layers.54.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8867840 + }, + { + "name": "model.layers.54.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 8878080 + }, + { + "name": "model.layers.54.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 8892416 + }, + { + "name": "model.layers.54.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 27242496 + } + ], + "md5sum": "aeb4fb074fc60b8fddf4085f3f379a30" + }, + { + "dataPath": "params_shard_213.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.55.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "6f443ab0c92dc0b3b3c10256ee1f3ef2" + }, + { + "dataPath": "params_shard_214.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.55.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "d610aa4a27b923ea12f5a5f7d9a3a3fc" + }, + { + "dataPath": "params_shard_215.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.55.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "b0e15a7716d7dc3c5abfa27dae3fb990" + }, + { + "dataPath": "params_shard_216.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.54.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.54.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.55.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.55.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.55.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.55.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.55.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.55.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "696e68138430dea53e97ec812956b0cf" + }, + { + "dataPath": "params_shard_217.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.56.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "920381a9af11a4475c3cd530efd93159" + }, + { + "dataPath": "params_shard_218.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.56.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "9257d1bd1b581639e8999c38a50a86bc" + }, + { + "dataPath": "params_shard_219.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.56.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "8d66dc01d60818a69bdcb2138f1ad062" + }, + { + "dataPath": "params_shard_220.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.55.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.55.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.56.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.56.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.56.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.56.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.56.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.56.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "00aefbd4e0d694dee1392084a3db7e93" + }, + { + "dataPath": "params_shard_221.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.57.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "20adcdd0d90d4869365298e7e5b4739b" + }, + { + "dataPath": "params_shard_222.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.57.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "f12a89701e0034d22939970481431fce" + }, + { + "dataPath": "params_shard_223.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.57.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "1358114cfc69880bf58a8f7c5a647c8c" + }, + { + "dataPath": "params_shard_224.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.56.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.56.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.57.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.57.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.57.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.57.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.57.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.57.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "5b8a4875d3915626676177fa0662e2b1" + }, + { + "dataPath": "params_shard_225.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.58.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "eaf16f62426a3e627d021d4179b09dde" + }, + { + "dataPath": "params_shard_226.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.58.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "ca516e64973d51f8aa906fa3234998f1" + }, + { + "dataPath": "params_shard_227.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.58.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "b4691fa8eb53e0a7a4d63737bd5e0759" + }, + { + "dataPath": "params_shard_228.bin", + "format": "raw-shard", + "nbytes": 32055296, + "records": [ + { + "name": "model.layers.57.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.57.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.58.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 13516800 + }, + { + "name": "model.layers.58.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 17940480 + }, + { + "name": "model.layers.58.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 17954816 + }, + { + "name": "model.layers.58.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 18528256 + }, + { + "name": "model.layers.58.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 31635456 + }, + { + "name": "model.layers.58.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 32045056 + } + ], + "md5sum": "49bc2595398a821a5c87f8c0e8a9886b" + }, + { + "dataPath": "params_shard_229.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.59.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "f559f543787190f88694578815e95ae3" + }, + { + "dataPath": "params_shard_230.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.59.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "81bbd5f5433ba23b2f723d75a71a3b77" + }, + { + "dataPath": "params_shard_231.bin", + "format": "raw-shard", + "nbytes": 27815936, + "records": [ + { + "name": "model.layers.58.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 0 + }, + { + "name": "model.layers.58.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2211840 + }, + { + "name": "model.layers.59.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 2222080 + }, + { + "name": "model.layers.59.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 2232320 + }, + { + "name": "model.layers.59.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 4444160 + }, + { + "name": "model.layers.59.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 8867840 + }, + { + "name": "model.layers.59.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 8878080 + }, + { + "name": "model.layers.59.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 8892416 + }, + { + "name": "model.layers.59.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 27242496 + } + ], + "md5sum": "15b0ad08bc72ce2cdb4576933f760fcb" + }, + { + "dataPath": "params_shard_232.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.60.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "619c7a4927768055483d660216481f90" + }, + { + "dataPath": "params_shard_233.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.60.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "5a42d330cb404b09274f6d7612e52e64" + }, + { + "dataPath": "params_shard_234.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.60.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "909081c03700a30877ed8a96acc5b4b9" + }, + { + "dataPath": "params_shard_235.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.59.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.59.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.60.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.60.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.60.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.60.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.60.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.60.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "0314825f48c118c07526868ad9d04305" + }, + { + "dataPath": "params_shard_236.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.61.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "963e7abaf2434e783299da254cc63a4f" + }, + { + "dataPath": "params_shard_237.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.61.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "a8f23bdd28b53774a6bb793be20538c2" + }, + { + "dataPath": "params_shard_238.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.61.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "cd6803e7788f32207eadc85086d263ff" + }, + { + "dataPath": "params_shard_239.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.60.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.60.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.61.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.61.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.61.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.61.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.61.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.61.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "bdfd269e7825dfbf74f0754930007ca3" + }, + { + "dataPath": "params_shard_240.bin", + "format": "raw-shard", + "nbytes": 70778880, + "records": [ + { + "name": "model.layers.62.mlp.down_proj.q_weight", + "shape": [ + 3456, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 70778880, + "byteOffset": 0 + } + ], + "md5sum": "1003c01e4207a2a930ee2d04c69a5f6f" + }, + { + "dataPath": "params_shard_241.bin", + "format": "raw-shard", + "nbytes": 141557760, + "records": [ + { + "name": "model.layers.62.mlp.gate_up_proj.q_weight", + "shape": [ + 640, + 55296 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 141557760, + "byteOffset": 0 + } + ], + "md5sum": "a915b0184d49836553b7875a1d9d394a" + }, + { + "dataPath": "params_shard_242.bin", + "format": "raw-shard", + "nbytes": 18350080, + "records": [ + { + "name": "model.layers.62.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 0 + } + ], + "md5sum": "7807958fca2e915e986b965150ec3cf0" + }, + { + "dataPath": "params_shard_243.bin", + "format": "raw-shard", + "nbytes": 20760576, + "records": [ + { + "name": "model.layers.61.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.61.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.62.input_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 13516800 + }, + { + "name": "model.layers.62.mlp.down_proj.q_scale", + "shape": [ + 216, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 13527040 + }, + { + "name": "model.layers.62.mlp.gate_up_proj.q_scale", + "shape": [ + 40, + 55296 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4423680, + "byteOffset": 15738880 + }, + { + "name": "model.layers.62.post_attention_layernorm.weight", + "shape": [ + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 10240, + "byteOffset": 20162560 + }, + { + "name": "model.layers.62.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 20172800 + }, + { + "name": "model.layers.62.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 20187136 + } + ], + "md5sum": "b5d7d22cb6ff6c8fc86f429862d8c29a" + }, + { + "dataPath": "params_shard_244.bin", + "format": "raw-shard", + "nbytes": 32454656, + "records": [ + { + "name": "model.layers.62.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.62.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + }, + { + "name": "model.layers.63.self_attn.c_attn.bias", + "shape": [ + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 14336, + "byteOffset": 13516800 + }, + { + "name": "model.layers.63.self_attn.c_attn.q_weight", + "shape": [ + 640, + 7168 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 18350080, + "byteOffset": 13531136 + }, + { + "name": "model.layers.63.self_attn.c_attn.q_scale", + "shape": [ + 40, + 7168 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 573440, + "byteOffset": 31881216 + } + ], + "md5sum": "b0109ad1c92f3a639698dd397daad203" + }, + { + "dataPath": "params_shard_245.bin", + "format": "raw-shard", + "nbytes": 13516800, + "records": [ + { + "name": "model.layers.63.self_attn.o_proj.q_weight", + "shape": [ + 640, + 5120 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13107200, + "byteOffset": 0 + }, + { + "name": "model.layers.63.self_attn.o_proj.q_scale", + "shape": [ + 40, + 5120 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 13107200 + } + ], + "md5sum": "edd7bc24dc83152620952be83d80b49f" + } + ] +} \ No newline at end of file