|
{ |
|
"metadata": { |
|
"ParamSize": 135, |
|
"ParamBytes": 4400242688.0, |
|
"BitsPerParam": 32.0 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 131084288, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.weight", |
|
"shape": [ |
|
32003, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131084288, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cc0ed85be2a8c0317b9701355b06d552" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 46137344, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
11264, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 46137344, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3dd515cad2c62e6956cb338bfada11e0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
5632 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1af86eb61f0d2fa2466465dea8dbc9ea" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29368320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
2560, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 10485760 |
|
}, |
|
{ |
|
"name": "model.layers.0.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.0.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18878464 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
2560, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 18882560 |
|
} |
|
], |
|
"md5sum": "f9bed0391d17e40343e2c90c45fff822" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 46137344, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
11264, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 46137344, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "97d2718af5ea9779f503379c2556e516" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31465472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
5632 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.1.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 31457280 |
|
}, |
|
{ |
|
"name": "model.layers.1.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 31461376 |
|
} |
|
], |
|
"md5sum": "4335e0565f1c118752b5d18bf96f239f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 46137344, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
11264, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 46137344, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5d59c33ffddda7bcca41c5c9ad65db51" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
5632 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "225dd6d9dd691ca0defb95bf03045fcd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29368320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
2560, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 10485760 |
|
}, |
|
{ |
|
"name": "model.layers.2.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.2.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18878464 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
2560, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 18882560 |
|
} |
|
], |
|
"md5sum": "24956e7e3440d13c787912f075b91808" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 46137344, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
11264, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 46137344, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ccf10355f04707b6ea13fe1905848786" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31465472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
5632 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.3.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 31457280 |
|
}, |
|
{ |
|
"name": "model.layers.3.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 31461376 |
|
} |
|
], |
|
"md5sum": "63a5e91ead395e5212dec520fe6eb3b9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 46137344, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
11264, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 46137344, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bd1768bd70d7a687d57c6eb5ad700f98" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
5632 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "378e8ccbebfe1c7c5a6abd44e17a4b33" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29368320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
2560, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 10485760 |
|
}, |
|
{ |
|
"name": "model.layers.4.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.4.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18878464 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
2560, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 18882560 |
|
} |
|
], |
|
"md5sum": "69ae4f8c3d765f016cc84e46902a5a61" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 46137344, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
11264, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 46137344, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "21882b96e7a6182502df1c8409f5ecf4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31465472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
5632 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.5.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 31457280 |
|
}, |
|
{ |
|
"name": "model.layers.5.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 31461376 |
|
} |
|
], |
|
"md5sum": "94fb7197266794b1d62a683e6f83a8f5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 46137344, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
11264, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 46137344, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bea00f9a26131c903821b452f4d9fea1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
5632 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "233adc3d89e9fd0e63e8c024f2656d18" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29368320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
2560, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 10485760 |
|
}, |
|
{ |
|
"name": "model.layers.6.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.6.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18878464 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
2560, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 18882560 |
|
} |
|
], |
|
"md5sum": "abbe67b5d794b62799b3bcbdaf1d2f13" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 46137344, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
11264, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 46137344, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b9925f234796242b06783b10faa4f066" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31465472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
5632 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.7.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 31457280 |
|
}, |
|
{ |
|
"name": "model.layers.7.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 31461376 |
|
} |
|
], |
|
"md5sum": "449553c5c774c9612c71db8e6fe0408e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 46137344, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
11264, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 46137344, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2d45e9f3e8159430be240362d67ad6af" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
5632 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "62d066a0af5c29b2e152d1f0e1fa83a5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29368320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
2560, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 10485760 |
|
}, |
|
{ |
|
"name": "model.layers.8.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.8.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18878464 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
2560, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 18882560 |
|
} |
|
], |
|
"md5sum": "7bc70a5644474679d9b8c803f86803e6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 46137344, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
11264, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 46137344, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a385c498afbccecefdb8faefc328a383" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31465472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
5632 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.9.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 31457280 |
|
}, |
|
{ |
|
"name": "model.layers.9.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 31461376 |
|
} |
|
], |
|
"md5sum": "482f31eacf412fc7c13d5fef45ef2909" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 46137344, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
11264, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 46137344, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "850943605e71e9be8f0691c3b55959d3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
5632 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9207fc9eb5cd6be8fac65a1c7685ff73" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29368320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
2560, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 10485760 |
|
}, |
|
{ |
|
"name": "model.layers.10.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.10.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18878464 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
2560, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 18882560 |
|
} |
|
], |
|
"md5sum": "f1212bb094d6783e2fc69b90346e22ff" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "raw-shard", |
|
"nbytes": 46137344, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
11264, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 46137344, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "736af883b80453625d8250b9aa77ac26" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31465472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
5632 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.11.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 31457280 |
|
}, |
|
{ |
|
"name": "model.layers.11.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 31461376 |
|
} |
|
], |
|
"md5sum": "36986d44545c8f12516cdaefdb74694b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "raw-shard", |
|
"nbytes": 46137344, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
11264, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 46137344, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5f97fe1675d789ee5a5a5881b1775515" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
5632 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "051b501162303570dff375b79148951d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29368320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
2560, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 10485760 |
|
}, |
|
{ |
|
"name": "model.layers.12.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.12.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18878464 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
2560, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 18882560 |
|
} |
|
], |
|
"md5sum": "4ae2730f899b0d292e208596ea98b590" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "raw-shard", |
|
"nbytes": 46137344, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
11264, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 46137344, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "15f42972b33f6cadbc0b5a17e20ac9b6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31465472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
5632 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.13.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 31457280 |
|
}, |
|
{ |
|
"name": "model.layers.13.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 31461376 |
|
} |
|
], |
|
"md5sum": "82ec8f8938814a4e75058620d84285c7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "raw-shard", |
|
"nbytes": 46137344, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
11264, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 46137344, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ccd48809d794270ecb6929d9547aea97" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
5632 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d0ce7dab232e7305f3f243febbb4324c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29368320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
2560, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 10485760 |
|
}, |
|
{ |
|
"name": "model.layers.14.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.14.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18878464 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
2560, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 18882560 |
|
} |
|
], |
|
"md5sum": "0f007605b9207b32a5c79249505897ef" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "raw-shard", |
|
"nbytes": 46137344, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
11264, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 46137344, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e42d4c04fb873f035b307b92df26974e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31465472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
5632 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.15.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 31457280 |
|
}, |
|
{ |
|
"name": "model.layers.15.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 31461376 |
|
} |
|
], |
|
"md5sum": "7168ff2801ea67e99469b52abb83f25d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "raw-shard", |
|
"nbytes": 46137344, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
11264, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 46137344, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "623e1a60bfb63c0d49297304aa0aed3e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
5632 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3c4c8058c508560fd9f604e942dd3caa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29368320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
2560, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 10485760 |
|
}, |
|
{ |
|
"name": "model.layers.16.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.16.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18878464 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
2560, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 18882560 |
|
} |
|
], |
|
"md5sum": "80dec21b661afcd092d85dcde0f0ec8a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "raw-shard", |
|
"nbytes": 46137344, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
11264, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 46137344, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9242db19ee58801b87f6cdd63fd1139b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31465472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.17.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
5632 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.17.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 31457280 |
|
}, |
|
{ |
|
"name": "model.layers.17.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 31461376 |
|
} |
|
], |
|
"md5sum": "f83f0c6e381c128e2cbdc59dfd54fa47" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "raw-shard", |
|
"nbytes": 46137344, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
11264, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 46137344, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2663610937313216a42645bc74895a87" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
5632 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "973defd93492a1311559695a1c45a149" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29368320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
2560, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 10485760 |
|
}, |
|
{ |
|
"name": "model.layers.18.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.18.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18878464 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
2560, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 18882560 |
|
} |
|
], |
|
"md5sum": "a3274aef0ddf44dbd215f1588d7a8bfe" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "raw-shard", |
|
"nbytes": 46137344, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
11264, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 46137344, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d8b408bc677af3eb67cca3d5f31151e2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31465472, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
5632 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.19.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 31457280 |
|
}, |
|
{ |
|
"name": "model.layers.19.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 31461376 |
|
} |
|
], |
|
"md5sum": "0ea6e376f976c09ec4c316553e5fe49f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_51.bin", |
|
"format": "raw-shard", |
|
"nbytes": 46137344, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
11264, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 46137344, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c433565a3056b5b776c65a1536c565bc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_52.bin", |
|
"format": "raw-shard", |
|
"nbytes": 23068672, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
5632 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1b51d01988351e7bba03eb34e46a2585" |
|
}, |
|
{ |
|
"dataPath": "params_shard_53.bin", |
|
"format": "raw-shard", |
|
"nbytes": 29368320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
2560, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 10485760 |
|
}, |
|
{ |
|
"name": "model.layers.20.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.20.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 18878464 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.qkv_proj.weight", |
|
"shape": [ |
|
2560, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10485760, |
|
"byteOffset": 18882560 |
|
} |
|
], |
|
"md5sum": "65304737b5497527ce7b0d2377866424" |
|
}, |
|
{ |
|
"dataPath": "params_shard_54.bin", |
|
"format": "raw-shard", |
|
"nbytes": 46137344, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
11264, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 46137344, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6e5cd3b68492da8c01ef96dc86faa8a7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_55.bin", |
|
"format": "raw-shard", |
|
"nbytes": 131084288, |
|
"records": [ |
|
{ |
|
"name": "lm_head.weight", |
|
"shape": [ |
|
32003, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131084288, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cbafe195db523a0e9302dd15bde0f529" |
|
}, |
|
{ |
|
"dataPath": "params_shard_56.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31469568, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.weight", |
|
"shape": [ |
|
2048, |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 8388608, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.down_proj.weight", |
|
"shape": [ |
|
2048, |
|
5632 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 23068672, |
|
"byteOffset": 8388608 |
|
}, |
|
{ |
|
"name": "model.layers.21.input_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 31457280 |
|
}, |
|
{ |
|
"name": "model.layers.21.post_attention_layernorm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 31461376 |
|
}, |
|
{ |
|
"name": "model.norm.weight", |
|
"shape": [ |
|
2048 |
|
], |
|
"dtype": "float32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4096, |
|
"byteOffset": 31465472 |
|
} |
|
], |
|
"md5sum": "2d491aad8e6aa64a244bca58e8049b34" |
|
} |
|
] |
|
} |