illian01's picture
Add weights and config
790e1b5
{
"metadata": {
"ParamSize": 125,
"ParamBytes": 1513693184.0,
"BitsPerParam": 4.500437647753687
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 65536000,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
32000,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536000,
"byteOffset": 0
}
],
"md5sum": "a5721d9560034ebdaf9d54846920d333"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 33357824,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
32000,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192000,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
512,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 8192000
}
],
"md5sum": "5552f5612454729423354be2af931df1"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
512,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "ba27e916567a0c6f37f181aa8df94d1f"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
1376,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "734f8aa85a33629a93177a47c054492e"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
512,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "13ff761572920e237be3b62d59cbd4e0"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 32587776,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
128,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
512,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3145728
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
128,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11534336
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
128,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 12582912
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
344,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 18219008
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21037056
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
128,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21053440
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
512,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 24199168
}
],
"md5sum": "1d1b5df3e123de5bc1fbb1993222d84b"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
512,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "95d00be34be16fcf3a2fe2302724e82f"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
512,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "a422e8e8913d0267583354c6daa769a3"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 32063488,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
128,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 0
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
128,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 1048576
},
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
1376,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 6684672
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
344,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 29229056
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32047104
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32055296
}
],
"md5sum": "b7017825ad7f81708e314fcf9d7cb0de"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
512,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "4b91662b6e1ee4e7b38296ec31b72bb1"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
1376,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "852bbb7402bea2c0b6be3265d3d3c85b"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
512,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "7e293d65ec025332caa0bbf0809fd83c"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 32587776,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
128,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
512,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3145728
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
128,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11534336
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
128,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 12582912
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
344,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 18219008
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21037056
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
128,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21053440
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
512,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 24199168
}
],
"md5sum": "05b55ef5f5e542eae1880b70a3058648"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
512,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "7a434038b09c4c078fcf530723227ae7"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
512,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "d568ff3dafce1741101c8e53b9bf5782"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 32063488,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
128,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 0
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
128,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 1048576
},
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
1376,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 6684672
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
344,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 29229056
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32047104
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32055296
}
],
"md5sum": "b8783055d9133af4581ec9c708e2e387"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
512,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "f7e0999619a4b8fcf8ed2c4d3a43dc84"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
1376,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "760ea2fa6f4a377b77a3ea3931c01a9d"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
512,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "70a3d08ba73b8b54807c494e0fec7c64"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 32587776,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
128,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
512,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3145728
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
128,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11534336
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
128,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 12582912
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
344,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 18219008
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21037056
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
128,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21053440
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
512,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 24199168
}
],
"md5sum": "32bfcbd2bab849060a7708b6a7c733b9"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
512,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "e8d3b2a91b3f9dcdbafe5429b4ef3532"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
512,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "1fa932714da6576f3d25c1d8f2383755"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 32063488,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
128,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 0
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
128,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 1048576
},
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
1376,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 6684672
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
344,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 29229056
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32047104
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32055296
}
],
"md5sum": "1192f4b46399ee1f4117a3873193a3cb"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
512,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "fff6ec2b589e78fbac6e8f71c361c792"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
1376,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "6348bbf89535736f69b5fbaf16beaffb"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
512,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "97a5111ae4c3eb75a706632a87dbb4b6"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 32587776,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
128,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
512,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3145728
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
128,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11534336
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
128,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 12582912
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
344,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 18219008
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21037056
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
128,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21053440
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
512,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 24199168
}
],
"md5sum": "17b99865e224249e9c4a5724abef7548"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
512,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "dee2f47ac2113817449ad4037f14cc63"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
512,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "7a5feeebfe6442bcc4138fca9dc50b88"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 32063488,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
128,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 0
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
128,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 1048576
},
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
1376,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 6684672
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
344,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 29229056
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32047104
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32055296
}
],
"md5sum": "b00d0bdc57592edb480b7924e7c93bfa"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
512,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "8d7931724b0b7b94de937af5b59bcc8f"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
1376,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "8cd54da1c291ad7d61883e5e4aba30f8"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
512,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "c8caa63d672fe772e6b9a4c4146d8671"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 32587776,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
128,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
512,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3145728
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
128,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11534336
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
128,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 12582912
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
344,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 18219008
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21037056
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
128,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21053440
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
512,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 24199168
}
],
"md5sum": "622ead990a574bd531048c9fe80f6b7c"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
512,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "12055f1adbfb791eebbc0078fe8e5858"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
512,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "ae6ac334b031863772956ded167529c6"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 32063488,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
128,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 0
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
128,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 1048576
},
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
1376,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 6684672
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
344,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 29229056
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32047104
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32055296
}
],
"md5sum": "ae02657a6091f73cee639fad0a51c79c"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
512,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "2cfacefc2fa03ee433b1f6cefc838b4c"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
1376,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 0
}
],
"md5sum": "ef213324fb1862847cc91132225feb63"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
512,
12288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "f9c0976e25f3666e26528100ac3b1692"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 32587776,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
128,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
512,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 3145728
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
128,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 11534336
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
128,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 12582912
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
344,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 18219008
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21037056
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21045248
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
128,
12288
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 3145728,
"byteOffset": 21053440
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
512,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 24199168
}
],
"md5sum": "05237e2904fb62b138e1e6ea5c3496cd"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 45088768,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
512,
22016
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 45088768,
"byteOffset": 0
}
],
"md5sum": "d309b28471e4124e03531f6806631ce5"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 65536000,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
512,
32000
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65536000,
"byteOffset": 0
}
],
"md5sum": "ede184766099ed6415755f6629949e57"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 32071680,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
128,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1048576,
"byteOffset": 0
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
128,
22016
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5636096,
"byteOffset": 1048576
},
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
1376,
4096
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 22544384,
"byteOffset": 6684672
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
344,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2818048,
"byteOffset": 29229056
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32047104
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32055296
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32063488
}
],
"md5sum": "89d5e83b4da202f7bec532285ca1e80e"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 8192000,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
128,
32000
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192000,
"byteOffset": 0
}
],
"md5sum": "3503f06e614550cba195b922d158f64c"
}
]
}