diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,4447 @@ +{ + "metadata": { + "ParamSize": 325, + "ParamBytes": 3790773760.0, + "BitsPerParam": 4.500454370006109 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 65550336, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 32007, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65550336, + "byteOffset": 0 + } + ], + "md5sum": "266608fff322e7bd18a1273d1a812b4c" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 30746368, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 32007, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8193792, + "byteOffset": 0 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8193792 + }, + { + "name": "model.layers.29.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 8201984 + } + ], + "md5sum": "a9dc89df365b995910c8bbd6bce3d663" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.29.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "d2eea36c7fc0d60bd5b04b67ccbf06d4" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 31014912, + "records": [ + { + "name": "model.layers.29.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.29.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8462336 + }, + { + "name": "model.layers.30.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 8470528 + } + ], + "md5sum": "a58f2f78f8b5666aa0a8b43d5d861077" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.30.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "eda97af9f21c91c775334d562572f0b5" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.30.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "335cfa4652d2ee1f1841ccd68c14afaf" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.31.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "af923680911a7cc10a0146ea04cc3eda" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.31.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "5614d85eea0bcb3a1b65bb41a7682d20" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.31.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "c68c62e483cb0beab97af837fce0020d" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.30.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.30.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.30.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.30.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.31.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.31.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.31.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "1fae86c412ecfade7f933f2e0b5a7ab9" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 65550336, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 32007, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65550336, + "byteOffset": 0 + } + ], + "md5sum": "8e1b8871b7a927de3fed63413ffa636b" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "0468703120c163c2d649a8b4795aad97" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "b94a0bbc165f3a1c22348fc93c3e9dda" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.0.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "e1a1e482cd33ec5464f6cc57fa0d55af" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 29255424, + "records": [ + { + "name": "model.layers.31.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.31.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.norm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 32007, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8193792, + "byteOffset": 9445376 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 17639168 + }, + { + "name": "model.layers.0.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 17647360 + }, + { + "name": "model.layers.0.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 20465408 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26101504 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 26109696 + } + ], + "md5sum": "318673e2687961cf1ec8cb637fda492f" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.1.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "19a7b566f20ecef4087d5320704fc167" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "e58bd03849b0c17793b9f9c15b034a6a" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "a835018e937d70418380f7fe3eba5420" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "5ba68c3731978fa6a1dff3e8e5411fba" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "44491e563e434039880590b9ebc6d27e" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "11b5668c53ee8157b979309d59e238f1" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.1.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.1.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.1.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.2.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.2.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.2.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "65d3286dbd9987e124c1ff60827a94cd" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.3.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "34e7fa118a48936d43b1d8ee2c5682cd" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "7e4cd3f001f8ea211bbaefcf1683f29f" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "c314c98a352cec38709430db644938b2" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "977d494af7c663a53053f140495dafbf" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "42df3b161cb78214d61297656a804255" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "05f6ec317f722a34fd05c08c8dc0e0ff" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.3.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.3.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.3.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.4.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.4.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.4.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "ae64eb8579f4dc6fe28776b986654255" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "dfd3fbf098e1f307508cb3c574c0567f" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "b424d5f6e180088164a6b43a65ae245a" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "d1c84806a6d89bedf614f626c25fec65" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "dbda527d40bc641d3dd9c5cba29c9de5" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 30490624, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.5.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 9437184 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 12582912 + }, + { + "name": "model.layers.5.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 20971520 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22020096 + }, + { + "name": "model.layers.10.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 22028288 + }, + { + "name": "model.layers.10.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 24846336 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 30482432 + } + ], + "md5sum": "bfb18d52b094959d5c4c1ebaad2ec052" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "9a7dcfc53d703dcaeede276240633937" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "f99301d876dcf6df9aebd2f6ec4fe4fa" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "78295bf4f706d74ad609448bf06074d6" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 27992064, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "model.layers.10.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "model.layers.11.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 12582912 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 15728640 + }, + { + "name": "model.layers.11.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 24117248 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25165824 + }, + { + "name": "model.layers.5.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 25174016 + } + ], + "md5sum": "bf362963bd5954b7ac8812393b77ff98" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "1f48e009d7d8e9e0a1c69005032e3de7" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 31014912, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 0 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5636096 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5644288 + }, + { + "name": "model.layers.6.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 5652480 + }, + { + "name": "model.layers.6.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 28196864 + } + ], + "md5sum": "df8fe2a7bb20b1dc8522928c22df0d81" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 30810112, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 0 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5636096 + }, + { + "name": "model.layers.6.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 5644288 + } + ], + "md5sum": "0a938f9e375cb9da7a0c295c86aea1c5" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "711d9e76da8d3cf4e4bdab4385d1f59c" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "63c0f3a04ca428e709ca30c784c29469" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "090a342242461ac66b716627c42c8dad" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 32587776, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "model.layers.6.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "model.layers.7.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 12591104 + }, + { + "name": "model.layers.7.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 15409152 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.7.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21053440 + }, + { + "name": "model.layers.7.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 24199168 + } + ], + "md5sum": "45bc628ceab716351f5dddd1fe54ff97" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "aad09f3c7a5f4e3f403113aaee13ed20" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "9683d6ba4813973bf6256946804f5bba" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 32063488, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "model.layers.8.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 1056768 + }, + { + "name": "model.layers.8.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 23601152 + }, + { + "name": "model.layers.8.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 26419200 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32055296 + } + ], + "md5sum": "7ff61371b921110f5b085df6b84c5af2" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "7479f0f4a9af1eac900505a1531c9fce" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "c572115e79008be1d01ca64658361892" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "812074ed76addf828fc9b21d4fb64731" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 32587776, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 3145728 + }, + { + "name": "model.layers.8.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 11534336 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 12582912 + }, + { + "name": "model.layers.9.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 12591104 + }, + { + "name": "model.layers.9.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 15409152 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.9.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21053440 + }, + { + "name": "model.layers.9.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 24199168 + } + ], + "md5sum": "83b2951af7b46cd46262a93a12f7f3b3" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "68b414d6393eeb23f1dbe1f3c0a41804" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "ed107b4385ef9ae386bfbe244f0137fd" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 32071680, + "records": [ + { + "name": "model.layers.9.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "model.layers.11.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 1056768 + }, + { + "name": "model.layers.11.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 23601152 + }, + { + "name": "model.layers.11.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 26419200 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32055296 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32063488 + } + ], + "md5sum": "050fad8c53d1250a06ae7a4ab0a65eff" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "326e787333434006f6df467032146c80" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "d82d46af525eaee16d7559c68361718d" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "68d62f6cee74f9398de68605c6907415" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "3d8f3a67d9cbbee3b8a81b120e7f9594" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "9534516f6f0af239641d2afa1f1da83a" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.12.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.12.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.12.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.13.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.13.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.13.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "f35c3e8b78533b12da16eedccd8436a6" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.13.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.14.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "6f6a93f22d43457ae2307f792d46e1d3" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "dc161fde2900ec135165eacd9cc83636" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "e75d54b937889d8f2cfc4d8a20ee7c92" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "aebb9e5dacec242850c7fc6b88291f58" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "361e434c8a567806b4c81a39bddf44db" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "0a614a43d0f03f4133d967f5c3cec5c0" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.14.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.14.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.14.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.15.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.15.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.15.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "fa73c4338a600e4c9720c74d7142f15c" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.15.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.16.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "e644510c3dd2ed61eeb714d0e9048df7" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "e3964d36343d868fcd2e1e34760d3c1e" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "45cb3de2617ca0f6bc7c031270afae3b" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "2021340000b9a5282e369ba3c1fe37f0" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 32579584, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.16.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.16.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.16.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.17.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21045248 + }, + { + "name": "model.layers.17.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 24190976 + } + ], + "md5sum": "8903f36d0a95fe1c96a992e8adb20780" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "ce006b212d7a4c3d49fe10f9afb4f6aa" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "1a0fd9b24ad38136f25e8f2f7d75c74c" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 32071680, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "model.layers.17.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 1056768 + }, + { + "name": "model.layers.17.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 23601152 + }, + { + "name": "model.layers.17.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 26419200 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32055296 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32063488 + } + ], + "md5sum": "c8611e2cdc8a11d80bc84c19344ea7a7" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "6b6c24f5d95e1a1410df95475c7bff4d" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "5ce8dcbffb8a2a7e862750f3d9167d7f" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "ee69353fd464ab6161450105d71eb9ee" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "334d1861cfbfbce4969b562e668a5ea5" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "ed9e6a7e6ca3fe06e987180a9cbff88d" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.18.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.18.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.18.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.19.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.19.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.19.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "abac1ec7c3dc814ec63eaa05bb9f1a30" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.19.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.20.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "fa906c7963e4c4cdda2f8c9596de918e" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "dff23aa8696167b8cc4db42ff53b8bf6" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "d659ee990fbda985a84ecf36e9f364ac" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "252797d7c9377c88848b1b0e1cd8ab4b" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "ed13c9068721d7b5f2603aa8c1c7309a" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "ab38282e44c5cc77b54d452475963c02" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.20.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.20.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.20.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.21.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.21.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.21.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "83e1ac7aae6e03c4cc47f3e7c52df59b" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.21.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.22.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "59e454cd88aed9a7474a8bfb3875746c" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "6fa1b6913b616506682189c625a684f0" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "09249f754971edc0a54b61da63f01919" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "42f19fc8402fb64069170658629e2f6e" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 32579584, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.22.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.22.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.22.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.23.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21045248 + }, + { + "name": "model.layers.23.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 24190976 + } + ], + "md5sum": "693b85c7afaebc28a693fca2cef553c3" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "60a8d8a2fc71e84b67005516535e6998" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "11fb97763801bf39d36c5d914e8a213d" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 32071680, + "records": [ + { + "name": "model.layers.23.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 0 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 1048576 + }, + { + "name": "model.layers.23.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 1056768 + }, + { + "name": "model.layers.23.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 23601152 + }, + { + "name": "model.layers.23.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 26419200 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32055296 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 32063488 + } + ], + "md5sum": "4937d10d21fb893f09dc58e10da768f6" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.24.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "8a91b9d107a77d3445b032b907a0d185" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.24.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "202971639ea02cdbca1444208acad436" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.25.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "fbf14a73d50ca80eadf471fe273e9fef" + }, + { + "dataPath": "params_shard_100.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.25.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "ae49295c247e3c902a6e1bfae946b269" + }, + { + "dataPath": "params_shard_101.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.25.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "8063dee15370b118fc674eef0da9b179" + }, + { + "dataPath": "params_shard_102.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.24.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.24.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.24.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.24.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.25.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.25.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.25.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "08264f214f713f8de5f2ece2d633a515" + }, + { + "dataPath": "params_shard_103.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.25.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.26.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "41853d5d118420d6e34a7b5a3c9c2784" + }, + { + "dataPath": "params_shard_104.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.26.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "ef7d2b3ab4c39dfb7d1facf013981009" + }, + { + "dataPath": "params_shard_105.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.26.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "b81c5953499083c93d0779deb91408bb" + }, + { + "dataPath": "params_shard_106.bin", + "format": "raw-shard", + "nbytes": 22544384, + "records": [ + { + "name": "model.layers.27.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 0 + } + ], + "md5sum": "6fe9c0c0c2a4409e6456a9ab98380e94" + }, + { + "dataPath": "params_shard_107.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.27.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "17fc6ee529e3682845df68c7c6f8f520" + }, + { + "dataPath": "params_shard_108.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.27.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "d8154f5025d07538da9dc0d622aa76ba" + }, + { + "dataPath": "params_shard_109.bin", + "format": "raw-shard", + "nbytes": 32661504, + "records": [ + { + "name": "model.layers.26.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.26.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.26.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.26.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 21045248 + }, + { + "name": "model.layers.27.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 21053440 + }, + { + "name": "model.layers.27.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 23871488 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 29507584 + }, + { + "name": "model.layers.27.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 29515776 + } + ], + "md5sum": "7b1f4a9c96568931b296fcc2fba5d64c" + }, + { + "dataPath": "params_shard_110.bin", + "format": "raw-shard", + "nbytes": 31989760, + "records": [ + { + "name": "model.layers.27.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 8388608 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 9437184 + }, + { + "name": "model.layers.28.mlp.down_proj.q_weight", + "shape": [ + 4096, + 1376 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 22544384, + "byteOffset": 9445376 + } + ], + "md5sum": "2465fa3761f9e685bf2077ff63103bb0" + }, + { + "dataPath": "params_shard_111.bin", + "format": "raw-shard", + "nbytes": 45088768, + "records": [ + { + "name": "model.layers.28.mlp.gate_up_proj.q_weight", + "shape": [ + 22016, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 45088768, + "byteOffset": 0 + } + ], + "md5sum": "65ccc9a19028ec53aadcad0935e04845" + }, + { + "dataPath": "params_shard_112.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.28.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "3a8bae8195dcf7287137e61a62de2768" + }, + { + "dataPath": "params_shard_113.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.29.self_attn.qkv_proj.q_weight", + "shape": [ + 12288, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "6cd201d641b9e2207ca8399198da1421" + }, + { + "dataPath": "params_shard_114.bin", + "format": "raw-shard", + "nbytes": 32579584, + "records": [ + { + "name": "model.layers.28.mlp.down_proj.q_scale", + "shape": [ + 4096, + 344 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 2818048, + "byteOffset": 0 + }, + { + "name": "model.layers.28.mlp.gate_up_proj.q_scale", + "shape": [ + 22016, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 5636096, + "byteOffset": 2818048 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8454144 + }, + { + "name": "model.layers.28.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 8462336 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 11608064 + }, + { + "name": "model.layers.28.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 19996672 + }, + { + "name": "model.layers.29.self_attn.qkv_proj.q_scale", + "shape": [ + 12288, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 3145728, + "byteOffset": 21045248 + }, + { + "name": "model.layers.29.self_attn.o_proj.q_weight", + "shape": [ + 4096, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 24190976 + } + ], + "md5sum": "dde1156f70ea5f59430dc652f3d4169b" + }, + { + "dataPath": "params_shard_115.bin", + "format": "raw-shard", + "nbytes": 1048576, + "records": [ + { + "name": "model.layers.29.self_attn.o_proj.q_scale", + "shape": [ + 4096, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1048576, + "byteOffset": 0 + } + ], + "md5sum": "e39bea2450f836f7980525663398fdf8" + } + ] +} \ No newline at end of file