diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,4705 @@ +{ + "metadata": { + "ParamSize": 390, + "ParamBytes": 1572915200.0, + "BitsPerParam": 4.501369085231279 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 64389120, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 50304, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 64389120, + "byteOffset": 0 + } + ], + "md5sum": "33bb513b28f36ebb6c9352edf514e607" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 64389120, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 50304, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 64389120, + "byteOffset": 0 + } + ], + "md5sum": "4c6c33b611b88517efad9ca0b0c38e10" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 13824, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "92cf54de400a4e0bbb31141cafbc2226" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 28282880, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 50304, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8048640, + "byteOffset": 0 + }, + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 50304, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8048640, + "byteOffset": 8048640 + }, + { + "name": "model.layers.0.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 16097280 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 16102400 + }, + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 2560, + 864 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 16107520 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 2560, + 216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 24954880 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 13824, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 26060800 + }, + { + "name": "model.layers.0.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 28272640 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 28277760 + } + ], + "md5sum": "5bcca722f877257454d7af65489506a5" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 13824, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "bba45ba957851bfeea1b0776d1324392" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.1.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 2560, + 864 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 2560, + 216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 13824, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.1.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "56c70c8ef90b94ce8b9f4cca52130ac3" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 13824, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "0cfcdcba9657928fab48cfd882b93dfd" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.10.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 2560, + 864 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 2560, + 216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 13824, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.10.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "7273a1f1f9582b275e75e24808f7361a" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 13824, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "4931463e5f784c461e4f59e63a4aee2c" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.11.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 2560, + 864 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 2560, + 216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 13824, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.11.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "5bc677a59aacc8552f8f35c3bcd02fe9" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 13824, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "99acef9e7f788aa450afa6dd81a91ce5" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.12.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 2560, + 864 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 2560, + 216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 13824, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.12.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "44c41ed8e6b4b83ddee55003cf9e0114" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 13824, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "d51e6fca6ff6cf3427f5bc65131004d4" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.13.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 2560, + 864 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 2560, + 216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 13824, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.13.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "8ff6902fcca489e878edfe289075cd40" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 13824, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "1cf12630da5a449e124491e2ba583927" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.14.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 2560, + 864 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 2560, + 216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 13824, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.14.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "ebdccdef2fcc4b66a19e4de6bbe1ba2c" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 13824, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "46a46dfa568f54180eb34ab92b3b8e53" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.15.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 2560, + 864 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 2560, + 216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 13824, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.15.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "c216b9a10b71cd3216d15d975d45541a" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 13824, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "fb9f33b76a27b0c42cbbbcd8bf64bea2" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.16.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 2560, + 864 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 2560, + 216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 13824, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.16.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "e9e5c9757bdaf2b9138780e8902f51d3" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 13824, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "de1bb327af6fcf9febf1eda8c7d7b959" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.17.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 2560, + 864 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 2560, + 216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 13824, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.17.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "d532e6af81ebcf82877a8cbdf6496e82" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 13824, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "1fd755cc97ad0b885c3afac6e4cdcc26" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.18.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 2560, + 864 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 2560, + 216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 13824, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.18.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "deb384ae3635b4cba177e80ed78ebf1c" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 13824, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "5b987afcc196c12dbd647beea2319158" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.19.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 2560, + 864 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 2560, + 216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 13824, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.19.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "e9bc895d566d236c5eb14cd03e4bcbe7" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 13824, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "0b941962d2692fb3933ca78d6d8c99b3" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.2.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 2560, + 864 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 2560, + 216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 13824, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.2.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "cc00c351e78f57dd5cd6aaf8161a26a0" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 13824, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "92d2613b3ca45897ad589fd9e78c0c6c" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.20.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 2560, + 864 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 2560, + 216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 13824, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.20.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "2bd0671a3e6ff0ced48ded95c10838da" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 13824, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "24a8cb1f40e995b68c11abcf481e54d7" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.21.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 2560, + 864 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 2560, + 216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 13824, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.21.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "7115779964bd09697a9716b2b8ff159f" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 13824, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "5c6733f9724f3fccc5a14846d7102fec" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.22.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 2560, + 864 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 2560, + 216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 13824, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.22.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "b9bc007e9f3fcf2d0d6ae2b002ff43b6" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 13824, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "d800838d4a53c3b910c62d2715bde687" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.23.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 2560, + 864 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 2560, + 216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 13824, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.23.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "452ebe06ec5b577b81a820b5ca4c248d" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 13824, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "382ab9423010e3a4f755249d6237690e" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.24.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 2560, + 864 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 2560, + 216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 13824, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.24.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "71971bda7d8d1f7b96b2b3d477bfd4f3" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 13824, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "f34ab2dfec684f3ecf8953fae6f6557f" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.25.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 2560, + 864 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 2560, + 216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 13824, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.25.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "a25663a81acb8911289d7f92f75c5e7b" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 13824, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "c015cd1ff73f7de72aa2b55988b7ea3b" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.26.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 2560, + 864 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 2560, + 216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 13824, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.26.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "7c766ae6ba486451e80b5e394f741b35" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 13824, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "282ceccdcfabf9ff53b2d8885d2c2d07" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.27.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 2560, + 864 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 2560, + 216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 13824, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.27.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "472a5d5f8c4c0c145c8c8ed570fbc16c" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 13824, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "4cc3bd4cfee9d1b30a4dd3612e7d7e49" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.28.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 2560, + 864 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 2560, + 216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 13824, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.28.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "bc091d5859c2569e14676153dd819caa" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 13824, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "697f7667dacd874d63be96baf77eb915" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.29.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 2560, + 864 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 2560, + 216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 13824, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.29.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "857cdd01c1420567e9ea5f74c3d25b44" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 13824, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "3b190d8f2179d9ce0c931cc28b7e1c64" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.3.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 2560, + 864 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 2560, + 216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 13824, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.3.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "9f304494424994c92fb500044f3538d3" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 13824, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "81413d32bfa84670dcff7da4012ea78f" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.30.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 2560, + 864 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 2560, + 216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 13824, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.30.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "2ece0a5d7949574d1554d1f82242f9a2" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 13824, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "c623c66c1d6d752fdef8fd3fa4492d07" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.31.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 2560, + 864 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 2560, + 216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 13824, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.31.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "44c0b0ca47fa2b735f73fed07ee464b2" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 13824, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "9d8c42e04066ced497e9eaa6f88718fb" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.4.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 2560, + 864 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 2560, + 216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 13824, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.4.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "654a7520c445991c7ada77398fc227c5" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 13824, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "aa9a6eda73762ad531b256e8637b75ae" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.5.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 2560, + 864 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 2560, + 216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 13824, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.5.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "4f69964b0e5e14a98c9b4ec795dfb6bf" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 13824, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "1d2d72e76c3d037c169a34dd2b231b09" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.6.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 2560, + 864 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 2560, + 216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 13824, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.6.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "12cad3b1fb76cac16072846768767f66" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 13824, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "4c8cbe42728a47514f9ce4a3b3bce476" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.7.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 2560, + 864 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 2560, + 216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 13824, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.7.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "c16515be9ab69668a01072dd283ac634" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 13824, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "62bee20bf1f9fc7f3910c8d8d48ec7b8" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.8.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 2560, + 864 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 2560, + 216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 13824, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.8.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "55c32ffbed6edd675625f5646120e94b" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 17694720, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 13824, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 17694720, + "byteOffset": 0 + } + ], + "md5sum": "a20bf1433300232971c943cac71ea494" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 26931200, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.layers.9.input_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + }, + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 2560, + 864 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8847360, + "byteOffset": 14755840 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 2560, + 216 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1105920, + "byteOffset": 23603200 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 13824, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2211840, + "byteOffset": 24709120 + }, + { + "name": "model.layers.9.post_attention_layernorm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26920960 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 26926080 + } + ], + "md5sum": "644f39d26f65816bb7f7d277e55c1f4c" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 14755840, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 7680, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 9830400, + "byteOffset": 0 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 7680, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1228800, + "byteOffset": 9830400 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 2560, + 320 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3276800, + "byteOffset": 11059200 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 2560, + 80 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 409600, + "byteOffset": 14336000 + }, + { + "name": "model.norm.bias", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14745600 + }, + { + "name": "model.norm.weight", + "shape": [ + 2560 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5120, + "byteOffset": 14750720 + } + ], + "md5sum": "340ab1623dce958bcb319ebe41c5f3af" + } + ] +} \ No newline at end of file