diff --git a/mlc-chat-config.json b/mlc-chat-config.json
new file mode 100644
index 0000000000000000000000000000000000000000..7b7a8144cb2cca053aaa71ca2cfdb4391f611901
--- /dev/null
+++ b/mlc-chat-config.json
@@ -0,0 +1,77 @@
+{
+ "version": "0.1.0",
+ "model_type": "orion",
+ "quantization": "q4f16_1",
+ "model_config": {
+ "hidden_size": 5120,
+ "intermediate_size": 15360,
+ "num_attention_heads": 40,
+ "num_hidden_layers": 40,
+ "rms_norm_eps": 1e-05,
+ "vocab_size": 84608,
+ "position_embedding_base": 10000.0,
+ "context_window_size": 4096,
+ "prefill_chunk_size": 4096,
+ "num_key_value_heads": 40,
+ "head_dim": 128,
+ "tensor_parallel_shards": 1,
+ "max_batch_size": 128
+ },
+ "vocab_size": 84608,
+ "context_window_size": 4096,
+ "sliding_window_size": -1,
+ "prefill_chunk_size": 4096,
+ "attention_sink_size": -1,
+ "tensor_parallel_shards": 1,
+ "pipeline_parallel_stages": 1,
+ "temperature": 0.3,
+ "presence_penalty": 0.0,
+ "frequency_penalty": 0.0,
+ "repetition_penalty": 1.05,
+ "top_p": 0.9,
+ "tokenizer_files": [
+ "tokenizer.model",
+ "tokenizer_config.json"
+ ],
+ "tokenizer_info": {
+ "token_postproc_method": "byte_fallback",
+ "prepend_space_in_encode": false,
+ "strip_space_in_decode": false
+ },
+ "conv_template": {
+ "name": "orion",
+ "system_template": "{system_message}",
+ "system_message": "",
+ "system_prefix_token_ids": [
+ 1
+ ],
+ "add_role_after_system_message": true,
+ "roles": {
+ "user": "Human: ",
+ "assistant": "Assistant: "
+ },
+ "role_templates": {
+ "user": "{user_message}",
+ "assistant": "{assistant_message}",
+ "tool": "{tool_message}"
+ },
+ "messages": [],
+ "seps": [
+ "\n\n",
+ ""
+ ],
+ "role_content_sep": "",
+ "role_empty_sep": "",
+ "stop_str": [
+ ""
+ ],
+ "stop_token_ids": [
+ 2
+ ],
+ "function_string": "",
+ "use_function_calling": false
+ },
+ "pad_token_id": 0,
+ "bos_token_id": 1,
+ "eos_token_id": 2
+}
\ No newline at end of file
diff --git a/ndarray-cache.json b/ndarray-cache.json
new file mode 100644
index 0000000000000000000000000000000000000000..a5876e264095d91e0245bed3af3df43193b7a56c
--- /dev/null
+++ b/ndarray-cache.json
@@ -0,0 +1,6585 @@
+{
+ "metadata": {
+ "ParamSize": 486,
+ "ParamBytes": 8156712960.0,
+ "BitsPerParam": 4.50065789055498
+ },
+ "records": [
+ {
+ "dataPath": "params_shard_0.bin",
+ "format": "raw-shard",
+ "nbytes": 216596480,
+ "records": [
+ {
+ "name": "lm_head.q_weight",
+ "shape": [
+ 84608,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 216596480,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "70fa49eb3c493d2b51677dd2c8c77761"
+ },
+ {
+ "dataPath": "params_shard_1.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.27.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "b71502a403a980c670d6417e838740ff"
+ },
+ {
+ "dataPath": "params_shard_2.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.28.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "099a4e0752317477cc9fadbb7df25589"
+ },
+ {
+ "dataPath": "params_shard_3.bin",
+ "format": "raw-shard",
+ "nbytes": 32051200,
+ "records": [
+ {
+ "name": "lm_head.q_scale",
+ "shape": [
+ 84608,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 27074560,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.27.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 27074560
+ },
+ {
+ "name": "model.layers.27.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 27084800
+ },
+ {
+ "name": "model.layers.27.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 27095040
+ },
+ {
+ "name": "model.layers.27.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 32010240
+ },
+ {
+ "name": "model.layers.27.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 32020480
+ },
+ {
+ "name": "model.layers.28.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 32030720
+ },
+ {
+ "name": "model.layers.28.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 32040960
+ }
+ ],
+ "md5sum": "1df1e2b812049ba6f9349c825d3cdcdd"
+ },
+ {
+ "dataPath": "params_shard_4.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.28.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "77cdf350c414d4447ebed8384c81d1d4"
+ },
+ {
+ "dataPath": "params_shard_5.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.28.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "d58bc20403e1b6c974ed3bcb98bd7fcd"
+ },
+ {
+ "dataPath": "params_shard_6.bin",
+ "format": "raw-shard",
+ "nbytes": 32788480,
+ "records": [
+ {
+ "name": "model.layers.28.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.28.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 4915200
+ },
+ {
+ "name": "model.layers.28.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14745600
+ },
+ {
+ "name": "model.layers.28.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14755840
+ },
+ {
+ "name": "model.layers.28.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 14766080
+ },
+ {
+ "name": "model.layers.28.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 19681280
+ }
+ ],
+ "md5sum": "d4b5ac1c5d910a699b7417469f66ddbf"
+ },
+ {
+ "dataPath": "params_shard_7.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.29.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "cdd4e35f71c346e957c0709a99c6b27c"
+ },
+ {
+ "dataPath": "params_shard_8.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.29.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "d8e5c5576272574911ee061347fa813f"
+ },
+ {
+ "dataPath": "params_shard_9.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.29.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "9df59716db410f30a855dde87ba4e4a4"
+ },
+ {
+ "dataPath": "params_shard_10.bin",
+ "format": "raw-shard",
+ "nbytes": 21340160,
+ "records": [
+ {
+ "name": "model.layers.28.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.29.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 1638400
+ },
+ {
+ "name": "model.layers.29.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 1648640
+ },
+ {
+ "name": "model.layers.29.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 1658880
+ },
+ {
+ "name": "model.layers.29.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 6574080
+ },
+ {
+ "name": "model.layers.29.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 16404480
+ },
+ {
+ "name": "model.layers.29.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 16414720
+ },
+ {
+ "name": "model.layers.29.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 16424960
+ }
+ ],
+ "md5sum": "9d98fe072d0c0fb7593910ac098bd9a0"
+ },
+ {
+ "dataPath": "params_shard_11.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.30.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "a60593c247608aa59b744908943a652c"
+ },
+ {
+ "dataPath": "params_shard_12.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.30.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "63b125f3e073b22451b0bd0373a518e1"
+ },
+ {
+ "dataPath": "params_shard_13.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.30.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "6783da5be01982b821f971e919ee1741"
+ },
+ {
+ "dataPath": "params_shard_14.bin",
+ "format": "raw-shard",
+ "nbytes": 29532160,
+ "records": [
+ {
+ "name": "model.layers.29.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.29.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 13107200
+ },
+ {
+ "name": "model.layers.30.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14745600
+ },
+ {
+ "name": "model.layers.30.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14755840
+ },
+ {
+ "name": "model.layers.30.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 14766080
+ },
+ {
+ "name": "model.layers.30.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 19681280
+ },
+ {
+ "name": "model.layers.30.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29511680
+ },
+ {
+ "name": "model.layers.30.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29521920
+ }
+ ],
+ "md5sum": "991e3d83e4f28c59bae264c1e9db84aa"
+ },
+ {
+ "dataPath": "params_shard_15.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.31.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "3c502e17045d4668a017badca4f127fb"
+ },
+ {
+ "dataPath": "params_shard_16.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.31.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "694e56e2339659b79e5a2a194cb49063"
+ },
+ {
+ "dataPath": "params_shard_17.bin",
+ "format": "raw-shard",
+ "nbytes": 24596480,
+ "records": [
+ {
+ "name": "model.layers.30.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.30.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 4915200
+ },
+ {
+ "name": "model.layers.30.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 18022400
+ },
+ {
+ "name": "model.layers.31.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 19660800
+ },
+ {
+ "name": "model.layers.31.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 19671040
+ },
+ {
+ "name": "model.layers.31.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 19681280
+ }
+ ],
+ "md5sum": "3fe027d0eef01938710756052104cb1e"
+ },
+ {
+ "dataPath": "params_shard_18.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.31.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "66bebfc92e5a93f1d1c2f482b2ee9995"
+ },
+ {
+ "dataPath": "params_shard_19.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.32.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "71a96d9a1f07362cf54cb2d0c6a07781"
+ },
+ {
+ "dataPath": "params_shard_20.bin",
+ "format": "raw-shard",
+ "nbytes": 29532160,
+ "records": [
+ {
+ "name": "model.layers.31.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.31.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 9830400
+ },
+ {
+ "name": "model.layers.31.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 9840640
+ },
+ {
+ "name": "model.layers.31.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 9850880
+ },
+ {
+ "name": "model.layers.31.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 14766080
+ },
+ {
+ "name": "model.layers.31.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 27873280
+ },
+ {
+ "name": "model.layers.32.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29511680
+ },
+ {
+ "name": "model.layers.32.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29521920
+ }
+ ],
+ "md5sum": "c550cca627bf621e946b39f39d6d071d"
+ },
+ {
+ "dataPath": "params_shard_21.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.32.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "d943f340920420e02f2f431b84a78279"
+ },
+ {
+ "dataPath": "params_shard_22.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.32.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "92f25d779811c720d33465986d78e9f9"
+ },
+ {
+ "dataPath": "params_shard_23.bin",
+ "format": "raw-shard",
+ "nbytes": 32788480,
+ "records": [
+ {
+ "name": "model.layers.32.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.32.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 4915200
+ },
+ {
+ "name": "model.layers.32.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14745600
+ },
+ {
+ "name": "model.layers.32.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14755840
+ },
+ {
+ "name": "model.layers.32.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 14766080
+ },
+ {
+ "name": "model.layers.32.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 19681280
+ }
+ ],
+ "md5sum": "73a1ad6faf21063019c00fbe1e6860ce"
+ },
+ {
+ "dataPath": "params_shard_24.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.33.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "f9cdf781669b9d2148669243fd1de04d"
+ },
+ {
+ "dataPath": "params_shard_25.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.33.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "07f20d64dc98d02a0caa09911c3042de"
+ },
+ {
+ "dataPath": "params_shard_26.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.33.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "e8e80ec38f0802ea41a5b74341cad51d"
+ },
+ {
+ "dataPath": "params_shard_27.bin",
+ "format": "raw-shard",
+ "nbytes": 21340160,
+ "records": [
+ {
+ "name": "model.layers.32.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.33.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 1638400
+ },
+ {
+ "name": "model.layers.33.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 1648640
+ },
+ {
+ "name": "model.layers.33.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 1658880
+ },
+ {
+ "name": "model.layers.33.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 6574080
+ },
+ {
+ "name": "model.layers.33.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 16404480
+ },
+ {
+ "name": "model.layers.33.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 16414720
+ },
+ {
+ "name": "model.layers.33.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 16424960
+ }
+ ],
+ "md5sum": "04689894836b6de2e5e25e745d1cc928"
+ },
+ {
+ "dataPath": "params_shard_28.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.34.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "c9755cac9e4a05970cf8b4c54d798400"
+ },
+ {
+ "dataPath": "params_shard_29.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.34.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "2438cb81ed0ca39088518692cac0eb6e"
+ },
+ {
+ "dataPath": "params_shard_30.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.34.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "9b9aaabaa3dbad557ccc3725bdf0e546"
+ },
+ {
+ "dataPath": "params_shard_31.bin",
+ "format": "raw-shard",
+ "nbytes": 29532160,
+ "records": [
+ {
+ "name": "model.layers.33.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.33.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 13107200
+ },
+ {
+ "name": "model.layers.34.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14745600
+ },
+ {
+ "name": "model.layers.34.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14755840
+ },
+ {
+ "name": "model.layers.34.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 14766080
+ },
+ {
+ "name": "model.layers.34.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 19681280
+ },
+ {
+ "name": "model.layers.34.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29511680
+ },
+ {
+ "name": "model.layers.34.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29521920
+ }
+ ],
+ "md5sum": "ff7940e731cb68ea2df1fd4283b961a0"
+ },
+ {
+ "dataPath": "params_shard_32.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.35.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "97e0daa3242a279df03b41cdc2d01a6e"
+ },
+ {
+ "dataPath": "params_shard_33.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.35.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "f53e08036e38b3a50a59109bde03f20d"
+ },
+ {
+ "dataPath": "params_shard_34.bin",
+ "format": "raw-shard",
+ "nbytes": 24596480,
+ "records": [
+ {
+ "name": "model.layers.34.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.34.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 4915200
+ },
+ {
+ "name": "model.layers.34.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 18022400
+ },
+ {
+ "name": "model.layers.35.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 19660800
+ },
+ {
+ "name": "model.layers.35.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 19671040
+ },
+ {
+ "name": "model.layers.35.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 19681280
+ }
+ ],
+ "md5sum": "921faca7e31cdc19bcd39a64f3fb75d9"
+ },
+ {
+ "dataPath": "params_shard_35.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.35.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "0b245add8688ca2cefb372ef93de811d"
+ },
+ {
+ "dataPath": "params_shard_36.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.36.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "2aa12a0661f300a0303eab548df42419"
+ },
+ {
+ "dataPath": "params_shard_37.bin",
+ "format": "raw-shard",
+ "nbytes": 29532160,
+ "records": [
+ {
+ "name": "model.layers.35.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.35.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 9830400
+ },
+ {
+ "name": "model.layers.35.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 9840640
+ },
+ {
+ "name": "model.layers.35.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 9850880
+ },
+ {
+ "name": "model.layers.35.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 14766080
+ },
+ {
+ "name": "model.layers.35.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 27873280
+ },
+ {
+ "name": "model.layers.36.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29511680
+ },
+ {
+ "name": "model.layers.36.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29521920
+ }
+ ],
+ "md5sum": "28d727ef81e4ea9a6460ea96610a201a"
+ },
+ {
+ "dataPath": "params_shard_38.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.36.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "5eb6ed2785fb1a40ecc7ea84b8d8eda5"
+ },
+ {
+ "dataPath": "params_shard_39.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.36.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "36923e2a78d9c2112fde1e3c23d86a25"
+ },
+ {
+ "dataPath": "params_shard_40.bin",
+ "format": "raw-shard",
+ "nbytes": 32788480,
+ "records": [
+ {
+ "name": "model.layers.36.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.36.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 4915200
+ },
+ {
+ "name": "model.layers.36.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14745600
+ },
+ {
+ "name": "model.layers.36.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14755840
+ },
+ {
+ "name": "model.layers.36.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 14766080
+ },
+ {
+ "name": "model.layers.36.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 19681280
+ }
+ ],
+ "md5sum": "78e4d9d4b36c726e315452ede0e3a2bf"
+ },
+ {
+ "dataPath": "params_shard_41.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.37.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "bbee22a1fbf01e33eb6079c2aeff9489"
+ },
+ {
+ "dataPath": "params_shard_42.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.37.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "adc91bc74c4fea6c703903d28b566820"
+ },
+ {
+ "dataPath": "params_shard_43.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.37.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "8297ae53b1d5f2838c13de4aa1f6f453"
+ },
+ {
+ "dataPath": "params_shard_44.bin",
+ "format": "raw-shard",
+ "nbytes": 21340160,
+ "records": [
+ {
+ "name": "model.layers.36.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.37.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 1638400
+ },
+ {
+ "name": "model.layers.37.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 1648640
+ },
+ {
+ "name": "model.layers.37.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 1658880
+ },
+ {
+ "name": "model.layers.37.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 6574080
+ },
+ {
+ "name": "model.layers.37.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 16404480
+ },
+ {
+ "name": "model.layers.37.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 16414720
+ },
+ {
+ "name": "model.layers.37.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 16424960
+ }
+ ],
+ "md5sum": "8b673b122e1c03d8846abe30c8b5a13a"
+ },
+ {
+ "dataPath": "params_shard_45.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.38.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "2e39fbaa85fae6bbc7b7386964ebb898"
+ },
+ {
+ "dataPath": "params_shard_46.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.38.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "a40b2555465b5c9033c87f8abb8eb3bc"
+ },
+ {
+ "dataPath": "params_shard_47.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.38.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "f6e293f82643fb36447895c0b125f08d"
+ },
+ {
+ "dataPath": "params_shard_48.bin",
+ "format": "raw-shard",
+ "nbytes": 29532160,
+ "records": [
+ {
+ "name": "model.layers.37.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.37.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 13107200
+ },
+ {
+ "name": "model.layers.38.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14745600
+ },
+ {
+ "name": "model.layers.38.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14755840
+ },
+ {
+ "name": "model.layers.38.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 14766080
+ },
+ {
+ "name": "model.layers.38.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 19681280
+ },
+ {
+ "name": "model.layers.38.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29511680
+ },
+ {
+ "name": "model.layers.38.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29521920
+ }
+ ],
+ "md5sum": "ce930ff5841ab91de2d5628f2e2e20c8"
+ },
+ {
+ "dataPath": "params_shard_49.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.39.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "6839c5af229cce80221fd3ac9d5e3980"
+ },
+ {
+ "dataPath": "params_shard_50.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.39.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "d2d272cc333436e03047a9e48167dcd8"
+ },
+ {
+ "dataPath": "params_shard_51.bin",
+ "format": "raw-shard",
+ "nbytes": 24596480,
+ "records": [
+ {
+ "name": "model.layers.38.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.38.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 4915200
+ },
+ {
+ "name": "model.layers.38.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 18022400
+ },
+ {
+ "name": "model.layers.39.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 19660800
+ },
+ {
+ "name": "model.layers.39.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 19671040
+ },
+ {
+ "name": "model.layers.39.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 19681280
+ }
+ ],
+ "md5sum": "5b46eb1f78894bf12d65b1620538ef93"
+ },
+ {
+ "dataPath": "params_shard_52.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.39.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "f0ff10ef5b72dbd38c4e06a50d41495e"
+ },
+ {
+ "dataPath": "params_shard_53.bin",
+ "format": "raw-shard",
+ "nbytes": 216596480,
+ "records": [
+ {
+ "name": "model.embed_tokens.q_weight",
+ "shape": [
+ 84608,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 216596480,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "b12b12d475642b24aa69fe7f811145ca"
+ },
+ {
+ "dataPath": "params_shard_54.bin",
+ "format": "raw-shard",
+ "nbytes": 27074560,
+ "records": [
+ {
+ "name": "model.embed_tokens.q_scale",
+ "shape": [
+ 84608,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 27074560,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "d3aedbdc1b3affb80b03a3edbebcfcfd"
+ },
+ {
+ "dataPath": "params_shard_55.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.0.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "58676650c2a0177eb91791e2e3f3f662"
+ },
+ {
+ "dataPath": "params_shard_56.bin",
+ "format": "raw-shard",
+ "nbytes": 29552640,
+ "records": [
+ {
+ "name": "model.layers.39.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.39.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 9830400
+ },
+ {
+ "name": "model.layers.39.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 9840640
+ },
+ {
+ "name": "model.layers.39.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 9850880
+ },
+ {
+ "name": "model.layers.39.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 14766080
+ },
+ {
+ "name": "model.layers.39.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 27873280
+ },
+ {
+ "name": "model.norm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29511680
+ },
+ {
+ "name": "model.norm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29521920
+ },
+ {
+ "name": "model.layers.0.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29532160
+ },
+ {
+ "name": "model.layers.0.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29542400
+ }
+ ],
+ "md5sum": "8fa9999f9085ddfe0487829507329eae"
+ },
+ {
+ "dataPath": "params_shard_57.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.0.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "696e4999f68069b1773a4b634476eb43"
+ },
+ {
+ "dataPath": "params_shard_58.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.0.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "28a98790a89cea69405eaa61a097d8d5"
+ },
+ {
+ "dataPath": "params_shard_59.bin",
+ "format": "raw-shard",
+ "nbytes": 32788480,
+ "records": [
+ {
+ "name": "model.layers.0.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.0.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 4915200
+ },
+ {
+ "name": "model.layers.0.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14745600
+ },
+ {
+ "name": "model.layers.0.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14755840
+ },
+ {
+ "name": "model.layers.0.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 14766080
+ },
+ {
+ "name": "model.layers.0.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 19681280
+ }
+ ],
+ "md5sum": "21d046e1ac4c156ebfaac77193940e6e"
+ },
+ {
+ "dataPath": "params_shard_60.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.1.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "a0f2b864212a0eae754053a85e8fd887"
+ },
+ {
+ "dataPath": "params_shard_61.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.1.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "c01a91581f716055257b5eea881d88d2"
+ },
+ {
+ "dataPath": "params_shard_62.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.1.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "27edefe8de27d0fb88687adeb9875d6a"
+ },
+ {
+ "dataPath": "params_shard_63.bin",
+ "format": "raw-shard",
+ "nbytes": 21340160,
+ "records": [
+ {
+ "name": "model.layers.0.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.1.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 1638400
+ },
+ {
+ "name": "model.layers.1.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 1648640
+ },
+ {
+ "name": "model.layers.1.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 1658880
+ },
+ {
+ "name": "model.layers.1.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 6574080
+ },
+ {
+ "name": "model.layers.1.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 16404480
+ },
+ {
+ "name": "model.layers.1.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 16414720
+ },
+ {
+ "name": "model.layers.1.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 16424960
+ }
+ ],
+ "md5sum": "3e42ee42193e2e0f52be9705255fd473"
+ },
+ {
+ "dataPath": "params_shard_64.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.10.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "3b92dea68a4706d74ba07c0cdb8303cd"
+ },
+ {
+ "dataPath": "params_shard_65.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.10.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "c0edf6488edb74dc00056c9646a30ccc"
+ },
+ {
+ "dataPath": "params_shard_66.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.10.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "5d745b6f2194dda7562159e534ad7ed8"
+ },
+ {
+ "dataPath": "params_shard_67.bin",
+ "format": "raw-shard",
+ "nbytes": 29532160,
+ "records": [
+ {
+ "name": "model.layers.1.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.1.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 13107200
+ },
+ {
+ "name": "model.layers.10.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14745600
+ },
+ {
+ "name": "model.layers.10.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14755840
+ },
+ {
+ "name": "model.layers.10.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 14766080
+ },
+ {
+ "name": "model.layers.10.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 19681280
+ },
+ {
+ "name": "model.layers.10.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29511680
+ },
+ {
+ "name": "model.layers.10.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29521920
+ }
+ ],
+ "md5sum": "035ced7a8a755d1779cfcbebf2b5665e"
+ },
+ {
+ "dataPath": "params_shard_68.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.11.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "6061241ba846fc109777435508196a5d"
+ },
+ {
+ "dataPath": "params_shard_69.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.11.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "c74377d7b1dc83e40ecfce25e8565d6b"
+ },
+ {
+ "dataPath": "params_shard_70.bin",
+ "format": "raw-shard",
+ "nbytes": 24596480,
+ "records": [
+ {
+ "name": "model.layers.10.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.10.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 4915200
+ },
+ {
+ "name": "model.layers.10.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 18022400
+ },
+ {
+ "name": "model.layers.11.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 19660800
+ },
+ {
+ "name": "model.layers.11.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 19671040
+ },
+ {
+ "name": "model.layers.11.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 19681280
+ }
+ ],
+ "md5sum": "d028442b84182786570256a1bb75df29"
+ },
+ {
+ "dataPath": "params_shard_71.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.11.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "3db854761d7988815ed0b5b19c08a676"
+ },
+ {
+ "dataPath": "params_shard_72.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.12.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "06d8f8e18222a17e994824245810ec2a"
+ },
+ {
+ "dataPath": "params_shard_73.bin",
+ "format": "raw-shard",
+ "nbytes": 29532160,
+ "records": [
+ {
+ "name": "model.layers.11.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.11.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 9830400
+ },
+ {
+ "name": "model.layers.11.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 9840640
+ },
+ {
+ "name": "model.layers.11.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 9850880
+ },
+ {
+ "name": "model.layers.11.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 14766080
+ },
+ {
+ "name": "model.layers.11.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 27873280
+ },
+ {
+ "name": "model.layers.12.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29511680
+ },
+ {
+ "name": "model.layers.12.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29521920
+ }
+ ],
+ "md5sum": "44dc1c5f2f6d6ca0db072b1a9156386a"
+ },
+ {
+ "dataPath": "params_shard_74.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.12.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "b0d598c6d5caf3ffea3da4041ca9c4fd"
+ },
+ {
+ "dataPath": "params_shard_75.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.12.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "c2d139c6799c020eaf446cfb2c468f8c"
+ },
+ {
+ "dataPath": "params_shard_76.bin",
+ "format": "raw-shard",
+ "nbytes": 32788480,
+ "records": [
+ {
+ "name": "model.layers.12.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.12.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 4915200
+ },
+ {
+ "name": "model.layers.12.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14745600
+ },
+ {
+ "name": "model.layers.12.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14755840
+ },
+ {
+ "name": "model.layers.12.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 14766080
+ },
+ {
+ "name": "model.layers.12.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 19681280
+ }
+ ],
+ "md5sum": "21e085295e2fb3d0716e023ff72d5073"
+ },
+ {
+ "dataPath": "params_shard_77.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.13.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "7b04b723d0d4a5eb6ffeb69dc722001a"
+ },
+ {
+ "dataPath": "params_shard_78.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.2.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "cc75a9359a950b46070babc5071be060"
+ },
+ {
+ "dataPath": "params_shard_79.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.2.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "055e33be35d2d15ec39cc1c06f2c7684"
+ },
+ {
+ "dataPath": "params_shard_80.bin",
+ "format": "raw-shard",
+ "nbytes": 26234880,
+ "records": [
+ {
+ "name": "model.layers.12.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.13.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 1638400
+ },
+ {
+ "name": "model.layers.13.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 6553600
+ },
+ {
+ "name": "model.layers.13.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 19660800
+ },
+ {
+ "name": "model.layers.2.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 21299200
+ },
+ {
+ "name": "model.layers.2.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 21309440
+ },
+ {
+ "name": "model.layers.2.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 21319680
+ }
+ ],
+ "md5sum": "bf228d3c6b1b13be2de45dd45ab84f9c"
+ },
+ {
+ "dataPath": "params_shard_81.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.2.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "6d6b0800daabfcf6de2e48fbe89a76a8"
+ },
+ {
+ "dataPath": "params_shard_82.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.3.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "953f828afbfacb042403a1c0e281f313"
+ },
+ {
+ "dataPath": "params_shard_83.bin",
+ "format": "raw-shard",
+ "nbytes": 29532160,
+ "records": [
+ {
+ "name": "model.layers.2.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.2.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 9830400
+ },
+ {
+ "name": "model.layers.2.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 9840640
+ },
+ {
+ "name": "model.layers.2.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 9850880
+ },
+ {
+ "name": "model.layers.2.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 14766080
+ },
+ {
+ "name": "model.layers.2.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 27873280
+ },
+ {
+ "name": "model.layers.3.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29511680
+ },
+ {
+ "name": "model.layers.3.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29521920
+ }
+ ],
+ "md5sum": "c73f26b86361722146e1fd2858e8ac63"
+ },
+ {
+ "dataPath": "params_shard_84.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.3.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "e3bfd9c1b03734e1a3a3adc6a79caf27"
+ },
+ {
+ "dataPath": "params_shard_85.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.3.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "053298a4f67158a12977323376042b8f"
+ },
+ {
+ "dataPath": "params_shard_86.bin",
+ "format": "raw-shard",
+ "nbytes": 32788480,
+ "records": [
+ {
+ "name": "model.layers.3.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.3.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 4915200
+ },
+ {
+ "name": "model.layers.3.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14745600
+ },
+ {
+ "name": "model.layers.3.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14755840
+ },
+ {
+ "name": "model.layers.3.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 14766080
+ },
+ {
+ "name": "model.layers.3.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 19681280
+ }
+ ],
+ "md5sum": "8dcf4c6fc615c26ca132c4c8ef8e35ef"
+ },
+ {
+ "dataPath": "params_shard_87.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.4.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "74114f024b5d455a25cadb6c0b64e45b"
+ },
+ {
+ "dataPath": "params_shard_88.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.4.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "04fe95b9e6179d9ea6328dbd514fbdc7"
+ },
+ {
+ "dataPath": "params_shard_89.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.4.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "10a720cf64ec3849a93fdf092b432918"
+ },
+ {
+ "dataPath": "params_shard_90.bin",
+ "format": "raw-shard",
+ "nbytes": 21340160,
+ "records": [
+ {
+ "name": "model.layers.3.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.4.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 1638400
+ },
+ {
+ "name": "model.layers.4.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 1648640
+ },
+ {
+ "name": "model.layers.4.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 1658880
+ },
+ {
+ "name": "model.layers.4.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 6574080
+ },
+ {
+ "name": "model.layers.4.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 16404480
+ },
+ {
+ "name": "model.layers.4.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 16414720
+ },
+ {
+ "name": "model.layers.4.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 16424960
+ }
+ ],
+ "md5sum": "7e695a959754083c594e86c7cb655f3c"
+ },
+ {
+ "dataPath": "params_shard_91.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.5.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "6ed0f1f5ff41a28d1fef8d801f33476a"
+ },
+ {
+ "dataPath": "params_shard_92.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.5.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "98b82dd73af0021f80a78e40b639a840"
+ },
+ {
+ "dataPath": "params_shard_93.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.5.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "1a6f23350bbeda005e74c780c0274e52"
+ },
+ {
+ "dataPath": "params_shard_94.bin",
+ "format": "raw-shard",
+ "nbytes": 29532160,
+ "records": [
+ {
+ "name": "model.layers.4.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.4.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 13107200
+ },
+ {
+ "name": "model.layers.5.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14745600
+ },
+ {
+ "name": "model.layers.5.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14755840
+ },
+ {
+ "name": "model.layers.5.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 14766080
+ },
+ {
+ "name": "model.layers.5.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 19681280
+ },
+ {
+ "name": "model.layers.5.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29511680
+ },
+ {
+ "name": "model.layers.5.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29521920
+ }
+ ],
+ "md5sum": "d1ee36f6ba4cd8d38e59c4abe5a2fc0c"
+ },
+ {
+ "dataPath": "params_shard_95.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.6.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "012c3aa084a9b3a7366cc19cd26d0c34"
+ },
+ {
+ "dataPath": "params_shard_96.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.6.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "ac1e3c90fb1d90d007827891575cc659"
+ },
+ {
+ "dataPath": "params_shard_97.bin",
+ "format": "raw-shard",
+ "nbytes": 24596480,
+ "records": [
+ {
+ "name": "model.layers.5.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.5.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 4915200
+ },
+ {
+ "name": "model.layers.5.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 18022400
+ },
+ {
+ "name": "model.layers.6.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 19660800
+ },
+ {
+ "name": "model.layers.6.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 19671040
+ },
+ {
+ "name": "model.layers.6.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 19681280
+ }
+ ],
+ "md5sum": "2ed276e374e3580d9b7bb4a23c0f46ad"
+ },
+ {
+ "dataPath": "params_shard_98.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.6.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "4cc297258f87815f9af3a4abd60f9b2b"
+ },
+ {
+ "dataPath": "params_shard_99.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.7.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "6d7bf648c1ca8584863d4a0f0019d9da"
+ },
+ {
+ "dataPath": "params_shard_100.bin",
+ "format": "raw-shard",
+ "nbytes": 29532160,
+ "records": [
+ {
+ "name": "model.layers.6.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.6.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 9830400
+ },
+ {
+ "name": "model.layers.6.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 9840640
+ },
+ {
+ "name": "model.layers.6.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 9850880
+ },
+ {
+ "name": "model.layers.6.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 14766080
+ },
+ {
+ "name": "model.layers.6.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 27873280
+ },
+ {
+ "name": "model.layers.7.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29511680
+ },
+ {
+ "name": "model.layers.7.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29521920
+ }
+ ],
+ "md5sum": "09bdd7e7b420e62c9d5672087e88a4e1"
+ },
+ {
+ "dataPath": "params_shard_101.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.7.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "867f82ef6eca182144a09b6d053d13de"
+ },
+ {
+ "dataPath": "params_shard_102.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.7.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "e88c492ec7cf1881ad5c8563b362b795"
+ },
+ {
+ "dataPath": "params_shard_103.bin",
+ "format": "raw-shard",
+ "nbytes": 32788480,
+ "records": [
+ {
+ "name": "model.layers.7.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.7.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 4915200
+ },
+ {
+ "name": "model.layers.7.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14745600
+ },
+ {
+ "name": "model.layers.7.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14755840
+ },
+ {
+ "name": "model.layers.7.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 14766080
+ },
+ {
+ "name": "model.layers.7.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 19681280
+ }
+ ],
+ "md5sum": "a739b2696d01a7329e421a50b94813f1"
+ },
+ {
+ "dataPath": "params_shard_104.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.8.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "070454efb31c28e8eacd50f917a4aa99"
+ },
+ {
+ "dataPath": "params_shard_105.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.8.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "a39fdf6b59c55dc1fac36eb769b1b606"
+ },
+ {
+ "dataPath": "params_shard_106.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.8.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "1222b913575c5f7afcfd05fe0d15c4e4"
+ },
+ {
+ "dataPath": "params_shard_107.bin",
+ "format": "raw-shard",
+ "nbytes": 21340160,
+ "records": [
+ {
+ "name": "model.layers.7.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.8.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 1638400
+ },
+ {
+ "name": "model.layers.8.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 1648640
+ },
+ {
+ "name": "model.layers.8.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 1658880
+ },
+ {
+ "name": "model.layers.8.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 6574080
+ },
+ {
+ "name": "model.layers.8.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 16404480
+ },
+ {
+ "name": "model.layers.8.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 16414720
+ },
+ {
+ "name": "model.layers.8.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 16424960
+ }
+ ],
+ "md5sum": "f95a110a56a9d9c0867d62740d91606e"
+ },
+ {
+ "dataPath": "params_shard_108.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.9.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "ecc2ce72ff58f04655c0ed66921e8592"
+ },
+ {
+ "dataPath": "params_shard_109.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.9.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "b5b490149dcf50b751db0aec1c8c37bc"
+ },
+ {
+ "dataPath": "params_shard_110.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.9.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "390d0db0775f566b0a192b183e736254"
+ },
+ {
+ "dataPath": "params_shard_111.bin",
+ "format": "raw-shard",
+ "nbytes": 29532160,
+ "records": [
+ {
+ "name": "model.layers.8.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.8.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 13107200
+ },
+ {
+ "name": "model.layers.9.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14745600
+ },
+ {
+ "name": "model.layers.9.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14755840
+ },
+ {
+ "name": "model.layers.9.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 14766080
+ },
+ {
+ "name": "model.layers.9.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 19681280
+ },
+ {
+ "name": "model.layers.9.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29511680
+ },
+ {
+ "name": "model.layers.9.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29521920
+ }
+ ],
+ "md5sum": "5508c4901461c8d993789b33efb26d18"
+ },
+ {
+ "dataPath": "params_shard_112.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.13.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "766ce67be407c8bc6f83f0807e45d927"
+ },
+ {
+ "dataPath": "params_shard_113.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.13.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "a365b6a4700919d571a45e4bf70f5fde"
+ },
+ {
+ "dataPath": "params_shard_114.bin",
+ "format": "raw-shard",
+ "nbytes": 24596480,
+ "records": [
+ {
+ "name": "model.layers.9.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.9.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 4915200
+ },
+ {
+ "name": "model.layers.9.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 18022400
+ },
+ {
+ "name": "model.layers.13.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 19660800
+ },
+ {
+ "name": "model.layers.13.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 19671040
+ },
+ {
+ "name": "model.layers.13.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 19681280
+ }
+ ],
+ "md5sum": "2bc1ceeec54d6a058531b00de70e78d6"
+ },
+ {
+ "dataPath": "params_shard_115.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.14.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "549a826fdf8a6c68d1e423a729ce6424"
+ },
+ {
+ "dataPath": "params_shard_116.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.14.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "ad5944451273a8cbec47ee2eda09d3ff"
+ },
+ {
+ "dataPath": "params_shard_117.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.14.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "ac479bb0491af3f0774a1665408e5022"
+ },
+ {
+ "dataPath": "params_shard_118.bin",
+ "format": "raw-shard",
+ "nbytes": 29552640,
+ "records": [
+ {
+ "name": "model.layers.13.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.13.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 9830400
+ },
+ {
+ "name": "model.layers.13.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 9840640
+ },
+ {
+ "name": "model.layers.14.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 9850880
+ },
+ {
+ "name": "model.layers.14.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 9861120
+ },
+ {
+ "name": "model.layers.14.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 9871360
+ },
+ {
+ "name": "model.layers.14.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 14786560
+ },
+ {
+ "name": "model.layers.14.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 24616960
+ },
+ {
+ "name": "model.layers.14.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 24627200
+ },
+ {
+ "name": "model.layers.14.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 24637440
+ }
+ ],
+ "md5sum": "795a8143a20c137aa07c65ffe14b71d2"
+ },
+ {
+ "dataPath": "params_shard_119.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.15.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "9142e05f652dae5e2c2372ffec89e5d8"
+ },
+ {
+ "dataPath": "params_shard_120.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.15.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "74d20534899606e17066e8ff29062324"
+ },
+ {
+ "dataPath": "params_shard_121.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.15.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "11c8740433b8c1f51cb409e13915d0c5"
+ },
+ {
+ "dataPath": "params_shard_122.bin",
+ "format": "raw-shard",
+ "nbytes": 29532160,
+ "records": [
+ {
+ "name": "model.layers.14.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.14.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 13107200
+ },
+ {
+ "name": "model.layers.15.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14745600
+ },
+ {
+ "name": "model.layers.15.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14755840
+ },
+ {
+ "name": "model.layers.15.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 14766080
+ },
+ {
+ "name": "model.layers.15.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 19681280
+ },
+ {
+ "name": "model.layers.15.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29511680
+ },
+ {
+ "name": "model.layers.15.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29521920
+ }
+ ],
+ "md5sum": "30989a24c70060ce9076f35acb84f55a"
+ },
+ {
+ "dataPath": "params_shard_123.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.16.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "ca8eebe004bcfd4fd25b554a83a958cc"
+ },
+ {
+ "dataPath": "params_shard_124.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.16.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "327f8c0008b53ba83be8381fffdcfab6"
+ },
+ {
+ "dataPath": "params_shard_125.bin",
+ "format": "raw-shard",
+ "nbytes": 24596480,
+ "records": [
+ {
+ "name": "model.layers.15.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.15.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 4915200
+ },
+ {
+ "name": "model.layers.15.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 18022400
+ },
+ {
+ "name": "model.layers.16.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 19660800
+ },
+ {
+ "name": "model.layers.16.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 19671040
+ },
+ {
+ "name": "model.layers.16.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 19681280
+ }
+ ],
+ "md5sum": "15d5eb568482fe144697c8268515da06"
+ },
+ {
+ "dataPath": "params_shard_126.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.16.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "48352aa580a98be6e314514c6c1647e9"
+ },
+ {
+ "dataPath": "params_shard_127.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.17.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "e389cec5d78ad7691b07acf98a3b1fad"
+ },
+ {
+ "dataPath": "params_shard_128.bin",
+ "format": "raw-shard",
+ "nbytes": 29532160,
+ "records": [
+ {
+ "name": "model.layers.16.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.16.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 9830400
+ },
+ {
+ "name": "model.layers.16.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 9840640
+ },
+ {
+ "name": "model.layers.16.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 9850880
+ },
+ {
+ "name": "model.layers.16.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 14766080
+ },
+ {
+ "name": "model.layers.16.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 27873280
+ },
+ {
+ "name": "model.layers.17.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29511680
+ },
+ {
+ "name": "model.layers.17.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29521920
+ }
+ ],
+ "md5sum": "96a8afd7946c2eeef876baa6e6a900bb"
+ },
+ {
+ "dataPath": "params_shard_129.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.17.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "90a188eb7f06b55f07ba4258bb121ba6"
+ },
+ {
+ "dataPath": "params_shard_130.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.17.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "8433d5b5f498748a5ce9168c9eb798fa"
+ },
+ {
+ "dataPath": "params_shard_131.bin",
+ "format": "raw-shard",
+ "nbytes": 32788480,
+ "records": [
+ {
+ "name": "model.layers.17.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.17.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 4915200
+ },
+ {
+ "name": "model.layers.17.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14745600
+ },
+ {
+ "name": "model.layers.17.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14755840
+ },
+ {
+ "name": "model.layers.17.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 14766080
+ },
+ {
+ "name": "model.layers.17.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 19681280
+ }
+ ],
+ "md5sum": "88adde5dc7c74c200cc8c9ddabf819df"
+ },
+ {
+ "dataPath": "params_shard_132.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.18.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "f9397e1c4d4638f612825cacfa2a3bbd"
+ },
+ {
+ "dataPath": "params_shard_133.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.18.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "e6c662ca2784224f22ccb1f69e01d917"
+ },
+ {
+ "dataPath": "params_shard_134.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.18.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "ef8fc32c12744927409c1c59a3c7d11d"
+ },
+ {
+ "dataPath": "params_shard_135.bin",
+ "format": "raw-shard",
+ "nbytes": 21340160,
+ "records": [
+ {
+ "name": "model.layers.17.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.18.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 1638400
+ },
+ {
+ "name": "model.layers.18.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 1648640
+ },
+ {
+ "name": "model.layers.18.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 1658880
+ },
+ {
+ "name": "model.layers.18.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 6574080
+ },
+ {
+ "name": "model.layers.18.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 16404480
+ },
+ {
+ "name": "model.layers.18.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 16414720
+ },
+ {
+ "name": "model.layers.18.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 16424960
+ }
+ ],
+ "md5sum": "8dd25925187de336ea714e2598863acd"
+ },
+ {
+ "dataPath": "params_shard_136.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.19.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "79a568ac1b181f91a3b614c0df148420"
+ },
+ {
+ "dataPath": "params_shard_137.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.19.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "585513860db114ce21936bddde7f79f8"
+ },
+ {
+ "dataPath": "params_shard_138.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.19.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "1b3fe359a919caf696d74d060c7e226b"
+ },
+ {
+ "dataPath": "params_shard_139.bin",
+ "format": "raw-shard",
+ "nbytes": 29532160,
+ "records": [
+ {
+ "name": "model.layers.18.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.18.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 13107200
+ },
+ {
+ "name": "model.layers.19.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14745600
+ },
+ {
+ "name": "model.layers.19.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14755840
+ },
+ {
+ "name": "model.layers.19.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 14766080
+ },
+ {
+ "name": "model.layers.19.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 19681280
+ },
+ {
+ "name": "model.layers.19.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29511680
+ },
+ {
+ "name": "model.layers.19.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29521920
+ }
+ ],
+ "md5sum": "6acc007bfd202ee760a4840b071da2ea"
+ },
+ {
+ "dataPath": "params_shard_140.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.20.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "a48eb501044d82244e418a883d6e9f3a"
+ },
+ {
+ "dataPath": "params_shard_141.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.20.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "f79dddc2b1bbcdf5ba97fd240d75efaf"
+ },
+ {
+ "dataPath": "params_shard_142.bin",
+ "format": "raw-shard",
+ "nbytes": 24596480,
+ "records": [
+ {
+ "name": "model.layers.19.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.19.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 4915200
+ },
+ {
+ "name": "model.layers.19.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 18022400
+ },
+ {
+ "name": "model.layers.20.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 19660800
+ },
+ {
+ "name": "model.layers.20.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 19671040
+ },
+ {
+ "name": "model.layers.20.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 19681280
+ }
+ ],
+ "md5sum": "b801c76ca26df2facd140de89e819cac"
+ },
+ {
+ "dataPath": "params_shard_143.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.20.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "e38d59f4d6b478220bac645a0f8344bd"
+ },
+ {
+ "dataPath": "params_shard_144.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.21.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "fb8ac52ef36643891a926a1edcc9c38f"
+ },
+ {
+ "dataPath": "params_shard_145.bin",
+ "format": "raw-shard",
+ "nbytes": 29532160,
+ "records": [
+ {
+ "name": "model.layers.20.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.20.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 9830400
+ },
+ {
+ "name": "model.layers.20.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 9840640
+ },
+ {
+ "name": "model.layers.20.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 9850880
+ },
+ {
+ "name": "model.layers.20.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 14766080
+ },
+ {
+ "name": "model.layers.20.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 27873280
+ },
+ {
+ "name": "model.layers.21.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29511680
+ },
+ {
+ "name": "model.layers.21.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29521920
+ }
+ ],
+ "md5sum": "672daa7a530b35036cc1ee32ce4d3e52"
+ },
+ {
+ "dataPath": "params_shard_146.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.21.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "1736324e2476c0cc5754b65860fb6a17"
+ },
+ {
+ "dataPath": "params_shard_147.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.21.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "a98197b4c802e71191efb2a2a8ce4a9a"
+ },
+ {
+ "dataPath": "params_shard_148.bin",
+ "format": "raw-shard",
+ "nbytes": 32788480,
+ "records": [
+ {
+ "name": "model.layers.21.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.21.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 4915200
+ },
+ {
+ "name": "model.layers.21.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14745600
+ },
+ {
+ "name": "model.layers.21.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14755840
+ },
+ {
+ "name": "model.layers.21.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 14766080
+ },
+ {
+ "name": "model.layers.21.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 19681280
+ }
+ ],
+ "md5sum": "562091cb4a7a092dd8aecda7cc5942a2"
+ },
+ {
+ "dataPath": "params_shard_149.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.22.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "db3b9464484996caf94efca7949856a9"
+ },
+ {
+ "dataPath": "params_shard_150.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.22.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "0cf457e343dbbe2983513919621d7e3d"
+ },
+ {
+ "dataPath": "params_shard_151.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.22.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "9b41718d7fea52a31023f71d92ad7e7e"
+ },
+ {
+ "dataPath": "params_shard_152.bin",
+ "format": "raw-shard",
+ "nbytes": 21340160,
+ "records": [
+ {
+ "name": "model.layers.21.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.22.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 1638400
+ },
+ {
+ "name": "model.layers.22.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 1648640
+ },
+ {
+ "name": "model.layers.22.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 1658880
+ },
+ {
+ "name": "model.layers.22.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 6574080
+ },
+ {
+ "name": "model.layers.22.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 16404480
+ },
+ {
+ "name": "model.layers.22.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 16414720
+ },
+ {
+ "name": "model.layers.22.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 16424960
+ }
+ ],
+ "md5sum": "2e1a20e8e0bf96026dd81a3d8723387c"
+ },
+ {
+ "dataPath": "params_shard_153.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.23.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "fd34b952e367fad5a96f24bf3befbc9e"
+ },
+ {
+ "dataPath": "params_shard_154.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.23.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "37a59c0a136a49447665631b2d484818"
+ },
+ {
+ "dataPath": "params_shard_155.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.23.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "3af32e1e1141cbbb5d8b6d19b5cef4b4"
+ },
+ {
+ "dataPath": "params_shard_156.bin",
+ "format": "raw-shard",
+ "nbytes": 29532160,
+ "records": [
+ {
+ "name": "model.layers.22.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.22.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 13107200
+ },
+ {
+ "name": "model.layers.23.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14745600
+ },
+ {
+ "name": "model.layers.23.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14755840
+ },
+ {
+ "name": "model.layers.23.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 14766080
+ },
+ {
+ "name": "model.layers.23.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 19681280
+ },
+ {
+ "name": "model.layers.23.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29511680
+ },
+ {
+ "name": "model.layers.23.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29521920
+ }
+ ],
+ "md5sum": "60c9e24914356ed670da8b74f2cc9c85"
+ },
+ {
+ "dataPath": "params_shard_157.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.24.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "b998f9ed7904fd7b61df322f38af4d01"
+ },
+ {
+ "dataPath": "params_shard_158.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.24.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "f991e611553d1a271a10ed8eefd86caa"
+ },
+ {
+ "dataPath": "params_shard_159.bin",
+ "format": "raw-shard",
+ "nbytes": 24596480,
+ "records": [
+ {
+ "name": "model.layers.23.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.23.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 4915200
+ },
+ {
+ "name": "model.layers.23.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 18022400
+ },
+ {
+ "name": "model.layers.24.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 19660800
+ },
+ {
+ "name": "model.layers.24.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 19671040
+ },
+ {
+ "name": "model.layers.24.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 19681280
+ }
+ ],
+ "md5sum": "1919b77c2f6ffcb60f1d434e713c27c5"
+ },
+ {
+ "dataPath": "params_shard_160.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.24.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "9df12e7a0b9114236dd9a2839fbdeae3"
+ },
+ {
+ "dataPath": "params_shard_161.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.25.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "2aee3c096732f8fc551e18f99113b52b"
+ },
+ {
+ "dataPath": "params_shard_162.bin",
+ "format": "raw-shard",
+ "nbytes": 29532160,
+ "records": [
+ {
+ "name": "model.layers.24.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.24.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 9830400
+ },
+ {
+ "name": "model.layers.24.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 9840640
+ },
+ {
+ "name": "model.layers.24.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 9850880
+ },
+ {
+ "name": "model.layers.24.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 14766080
+ },
+ {
+ "name": "model.layers.24.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 27873280
+ },
+ {
+ "name": "model.layers.25.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29511680
+ },
+ {
+ "name": "model.layers.25.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 29521920
+ }
+ ],
+ "md5sum": "df360b1be6c2aa4071b3f393d786fcd1"
+ },
+ {
+ "dataPath": "params_shard_163.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.25.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "70858c47d4874487841c716581c4ed73"
+ },
+ {
+ "dataPath": "params_shard_164.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.25.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "eec1828d499534b7c11c28643e5fa9ad"
+ },
+ {
+ "dataPath": "params_shard_165.bin",
+ "format": "raw-shard",
+ "nbytes": 32788480,
+ "records": [
+ {
+ "name": "model.layers.25.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.25.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 4915200
+ },
+ {
+ "name": "model.layers.25.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14745600
+ },
+ {
+ "name": "model.layers.25.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 14755840
+ },
+ {
+ "name": "model.layers.25.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 14766080
+ },
+ {
+ "name": "model.layers.25.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 19681280
+ }
+ ],
+ "md5sum": "98a328c294d6e0e2ab64378454a8f8e3"
+ },
+ {
+ "dataPath": "params_shard_166.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.26.mlp.down_proj.q_weight",
+ "shape": [
+ 5120,
+ 1920
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "f3b106840d319d282775280f05c7da95"
+ },
+ {
+ "dataPath": "params_shard_167.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.26.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "1c802585e213ae828479e3ba42f04a07"
+ },
+ {
+ "dataPath": "params_shard_168.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.26.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "24ddd02fae8d864755722e4a3c397ff2"
+ },
+ {
+ "dataPath": "params_shard_169.bin",
+ "format": "raw-shard",
+ "nbytes": 21340160,
+ "records": [
+ {
+ "name": "model.layers.25.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.26.input_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 1638400
+ },
+ {
+ "name": "model.layers.26.input_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 1648640
+ },
+ {
+ "name": "model.layers.26.mlp.down_proj.q_scale",
+ "shape": [
+ 5120,
+ 480
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 1658880
+ },
+ {
+ "name": "model.layers.26.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 6574080
+ },
+ {
+ "name": "model.layers.26.post_attention_layernorm.bias",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 16404480
+ },
+ {
+ "name": "model.layers.26.post_attention_layernorm.weight",
+ "shape": [
+ 5120
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 10240,
+ "byteOffset": 16414720
+ },
+ {
+ "name": "model.layers.26.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 16424960
+ }
+ ],
+ "md5sum": "766966000253158ce0de1fc327083c40"
+ },
+ {
+ "dataPath": "params_shard_170.bin",
+ "format": "raw-shard",
+ "nbytes": 78643200,
+ "records": [
+ {
+ "name": "model.layers.27.mlp.gate_up_proj.q_weight",
+ "shape": [
+ 30720,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 78643200,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "7a23e189ebf5d3a69d85d32d1d09b9dc"
+ },
+ {
+ "dataPath": "params_shard_171.bin",
+ "format": "raw-shard",
+ "nbytes": 39321600,
+ "records": [
+ {
+ "name": "model.layers.27.self_attn.qkv_proj.q_weight",
+ "shape": [
+ 15360,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 39321600,
+ "byteOffset": 0
+ }
+ ],
+ "md5sum": "e19131109e5e6f435f9cb381610be6ea"
+ },
+ {
+ "dataPath": "params_shard_172.bin",
+ "format": "raw-shard",
+ "nbytes": 29491200,
+ "records": [
+ {
+ "name": "model.layers.26.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.26.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 13107200
+ },
+ {
+ "name": "model.layers.27.mlp.gate_up_proj.q_scale",
+ "shape": [
+ 30720,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 9830400,
+ "byteOffset": 14745600
+ },
+ {
+ "name": "model.layers.27.self_attn.qkv_proj.q_scale",
+ "shape": [
+ 15360,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 4915200,
+ "byteOffset": 24576000
+ }
+ ],
+ "md5sum": "e7bd7a7218b8efd7c48da852014c4530"
+ },
+ {
+ "dataPath": "params_shard_173.bin",
+ "format": "raw-shard",
+ "nbytes": 14745600,
+ "records": [
+ {
+ "name": "model.layers.27.self_attn.o_proj.q_weight",
+ "shape": [
+ 5120,
+ 640
+ ],
+ "dtype": "uint32",
+ "format": "f32-to-bf16",
+ "nbytes": 13107200,
+ "byteOffset": 0
+ },
+ {
+ "name": "model.layers.27.self_attn.o_proj.q_scale",
+ "shape": [
+ 5120,
+ 160
+ ],
+ "dtype": "float16",
+ "format": "f32-to-bf16",
+ "nbytes": 1638400,
+ "byteOffset": 13107200
+ }
+ ],
+ "md5sum": "e701a65564dd6cc1084451109859259c"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/params_shard_0.bin b/params_shard_0.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a42fa6d06d2315158da1e022c09b09f604c4b7d6
--- /dev/null
+++ b/params_shard_0.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2c6ebc9d3183afefd9ef89efa7d879e7cb311aac9dbd83675fc1354376ed8a96
+size 216596480
diff --git a/params_shard_1.bin b/params_shard_1.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a091d1e61a7cec36f740a53fda5b7a6c6ba46df4
--- /dev/null
+++ b/params_shard_1.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0325e4412c5581849d9c3adaf44b79024c0f49033163e5c6e4992c9abcb8b66e
+size 39321600
diff --git a/params_shard_10.bin b/params_shard_10.bin
new file mode 100644
index 0000000000000000000000000000000000000000..89c49f73c014ffd81d57ae176ef8cfe236b77802
--- /dev/null
+++ b/params_shard_10.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:604f025156c65a16894f667977db939c429267207c2bd04700c55d5ce1ee903b
+size 21340160
diff --git a/params_shard_100.bin b/params_shard_100.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8f6823888d2cb2c204db23f1c705df11bda78f61
--- /dev/null
+++ b/params_shard_100.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:010e21cf6b0281a88e09af5d61dc4e95e67cc97ed7a64502480eececfd3caef4
+size 29532160
diff --git a/params_shard_101.bin b/params_shard_101.bin
new file mode 100644
index 0000000000000000000000000000000000000000..9c3e87eb3c4a299244802b230775ddecc68a48a5
--- /dev/null
+++ b/params_shard_101.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4d0a9f914973a72137ae2c14a45a4071d675e751c4cd478130da3c690e5d71be
+size 78643200
diff --git a/params_shard_102.bin b/params_shard_102.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4664ea8b293295e648e50af84be4f29ae95542ce
--- /dev/null
+++ b/params_shard_102.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a65f8a18f37e8b10018368add2471666d836ded1741cf895ce78814d97558a8
+size 39321600
diff --git a/params_shard_103.bin b/params_shard_103.bin
new file mode 100644
index 0000000000000000000000000000000000000000..0f5085c8bbe86c8f8e1181563d2d3b184550a815
--- /dev/null
+++ b/params_shard_103.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7796cebe75beffdc50d9534cbff62655b08a55cba52538df1dd2599b815b1657
+size 32788480
diff --git a/params_shard_104.bin b/params_shard_104.bin
new file mode 100644
index 0000000000000000000000000000000000000000..54fc77d754853b6ebcb099b95dbaf8494fc3b2c7
--- /dev/null
+++ b/params_shard_104.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3e72aab7978b10c28ec6e9b2122df616b2fff06bac0f4596ce6c012e612ea0bb
+size 39321600
diff --git a/params_shard_105.bin b/params_shard_105.bin
new file mode 100644
index 0000000000000000000000000000000000000000..dbcc5518eef39b7eccd4d4e6f8c05b24f53a4f98
--- /dev/null
+++ b/params_shard_105.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:afdf19d23dbb0afcf55d8c68a44b7fc9c7e50457dc3c10b39915a7e6022531fd
+size 78643200
diff --git a/params_shard_106.bin b/params_shard_106.bin
new file mode 100644
index 0000000000000000000000000000000000000000..015feb4eaaaeff9a3dff3f28024fe86fff3b47d4
--- /dev/null
+++ b/params_shard_106.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:df9a7191d491b3161c1d7e3f58f1254657f4fdab537986295a43a7707e410dd3
+size 39321600
diff --git a/params_shard_107.bin b/params_shard_107.bin
new file mode 100644
index 0000000000000000000000000000000000000000..18f8f0577f73e03342f1464d802cfe2ddc8e87b9
--- /dev/null
+++ b/params_shard_107.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:778b035e4a3253c29fa1cd4aa1f52b56199dd68931b35e02c4d797904d48628c
+size 21340160
diff --git a/params_shard_108.bin b/params_shard_108.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ec719c19820a4d4abbce47d78d16bdcf9eb8e2a4
--- /dev/null
+++ b/params_shard_108.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b8b286f6912f64652b69dc5079866225429b2d6166ac4273d044b2eea225520c
+size 39321600
diff --git a/params_shard_109.bin b/params_shard_109.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5a13b382967a13014456db1ed3c85e49016f8f4f
--- /dev/null
+++ b/params_shard_109.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9fb613a3a431012d3d2a647075da76a00f47b48edf6b34bc7514f3dd016e3f8
+size 78643200
diff --git a/params_shard_11.bin b/params_shard_11.bin
new file mode 100644
index 0000000000000000000000000000000000000000..aed660ae07e342ab548df205c0185076a2eab43d
--- /dev/null
+++ b/params_shard_11.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f293648b52e58de4d29b1f7520390ead4eac0370b4a16837c20999d95c17016
+size 39321600
diff --git a/params_shard_110.bin b/params_shard_110.bin
new file mode 100644
index 0000000000000000000000000000000000000000..80dfe2cf65531f75ece326b1431b8146b79dba51
--- /dev/null
+++ b/params_shard_110.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:885aac671917b0e2fdc376302106a361337f55139c8be3d0d22b4722273eb8b9
+size 39321600
diff --git a/params_shard_111.bin b/params_shard_111.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5e71a7cd485aec065a148c7b7add9f9c1925e585
--- /dev/null
+++ b/params_shard_111.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2b1e4b0d6160c07afef765228ed563bc12d34fd73d7f98cab58c593e5941b26f
+size 29532160
diff --git a/params_shard_112.bin b/params_shard_112.bin
new file mode 100644
index 0000000000000000000000000000000000000000..165c5600e60fa26ef541eeca1d1ad21bec27a229
--- /dev/null
+++ b/params_shard_112.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b66070d0a932f8f9fb2a4a15ee6945a99bacc3cd8bc5d896b2ff9742721933b2
+size 39321600
diff --git a/params_shard_113.bin b/params_shard_113.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ba51c2fba8f41779f3100e0619bbd219ac7e9ba8
--- /dev/null
+++ b/params_shard_113.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b4c622b7cdfb48b106ab7a174e093dd0d1a07b659bf24fdfcfdc9a95dd546bda
+size 78643200
diff --git a/params_shard_114.bin b/params_shard_114.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2c968824a607e3e183c4929dac7f03e6fc0d2e41
--- /dev/null
+++ b/params_shard_114.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d3d5bc19295cce5140660ab3e6a01128327a5297255b271e625251355f9b844
+size 24596480
diff --git a/params_shard_115.bin b/params_shard_115.bin
new file mode 100644
index 0000000000000000000000000000000000000000..423fe6bdfc05edb8a12a8f3aa76bf681b8442012
--- /dev/null
+++ b/params_shard_115.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4df98d111db74c2261fb357890c9601a69d6aef3925fb5658a8f9e924282b117
+size 39321600
diff --git a/params_shard_116.bin b/params_shard_116.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4133bfb0b6620b7494adbb0c96a5d475c1510887
--- /dev/null
+++ b/params_shard_116.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a624c60f3869b18652988eb9b42abb00e2de58b7c10b8e5a26e32924802c2c93
+size 78643200
diff --git a/params_shard_117.bin b/params_shard_117.bin
new file mode 100644
index 0000000000000000000000000000000000000000..9f70f0ca57815aace01bdd9257e2566c92f2c133
--- /dev/null
+++ b/params_shard_117.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7e742d7563bfcffb87cb9a992e8fafaa23d1c26edaf45d03ec2f39509686875e
+size 39321600
diff --git a/params_shard_118.bin b/params_shard_118.bin
new file mode 100644
index 0000000000000000000000000000000000000000..bb362f1a050c8251c3d1b83130a6ba2cae9c9a1e
--- /dev/null
+++ b/params_shard_118.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a36db681f76eaff157575e67f2c539abc80035772093cc68771f605ecea2998e
+size 29552640
diff --git a/params_shard_119.bin b/params_shard_119.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3d0e38db37d483f5d41913e097d9fbd63536e4ef
--- /dev/null
+++ b/params_shard_119.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e3b76d3f5f7d8d5bea22d954b8c5f5d4ae4028dbccb78c817fa326db6e75021
+size 39321600
diff --git a/params_shard_12.bin b/params_shard_12.bin
new file mode 100644
index 0000000000000000000000000000000000000000..983e00a9dda5e675fbbad00029d9c9c0e6a1dd55
--- /dev/null
+++ b/params_shard_12.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b8f456e0f70313ad1a3355a2e68465c5800c7d6d1921dcbfa5b94971f447047
+size 78643200
diff --git a/params_shard_120.bin b/params_shard_120.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5472beb272fbbf7cd7d8bbc67458a7984e14b8c0
--- /dev/null
+++ b/params_shard_120.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f93892fbd23fe73270e3f03e97a7ea2f8550dbb4c6b11f1b3d89667eb01ab8c
+size 78643200
diff --git a/params_shard_121.bin b/params_shard_121.bin
new file mode 100644
index 0000000000000000000000000000000000000000..42cd7ba6a9cc2c211b7d8fd8469e3bfd2b111dd1
--- /dev/null
+++ b/params_shard_121.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:984ba37e70f7ee312c4e294168ecbd0ee8de8320b1f97533fbc3ab1342bb0f36
+size 39321600
diff --git a/params_shard_122.bin b/params_shard_122.bin
new file mode 100644
index 0000000000000000000000000000000000000000..714c9985103e2bdde8f2c84ce856aa32d4178a2a
--- /dev/null
+++ b/params_shard_122.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:06d66283c533bf4459c4118c3730b5cc8d23ebb7cf12b1be82401c0f886ed2fd
+size 29532160
diff --git a/params_shard_123.bin b/params_shard_123.bin
new file mode 100644
index 0000000000000000000000000000000000000000..6144422f8d812d908d31ceecceaa71cefa9be340
--- /dev/null
+++ b/params_shard_123.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0908097152c2f3097b2f8fcc76107f149d9642851d6b55c12de92d40af57d383
+size 39321600
diff --git a/params_shard_124.bin b/params_shard_124.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c35495243a0f6d22e0744eba9921b800db84b3c8
--- /dev/null
+++ b/params_shard_124.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:508cb2b1af76db10085223b2945b268fd04cd2129106e5ec91a197d932901ea4
+size 78643200
diff --git a/params_shard_125.bin b/params_shard_125.bin
new file mode 100644
index 0000000000000000000000000000000000000000..9e0d5829514c0cd463b4d93e8ee3def88488180b
--- /dev/null
+++ b/params_shard_125.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b9b2a455a701fbefc7a3b1e300a7b9e63d624e5c51576449deb2064c72386eef
+size 24596480
diff --git a/params_shard_126.bin b/params_shard_126.bin
new file mode 100644
index 0000000000000000000000000000000000000000..74e964f6ba064000d2d69dc3d363ae98868e1b06
--- /dev/null
+++ b/params_shard_126.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:855528910738d79ec47952435c7dbfcaed46b129acc0aa3782d1399c86b3892f
+size 39321600
diff --git a/params_shard_127.bin b/params_shard_127.bin
new file mode 100644
index 0000000000000000000000000000000000000000..119f624bb3d36a42a9c0fd6ffa39ab3b99b6c41b
--- /dev/null
+++ b/params_shard_127.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6777960f6bd980fd3ff6b0c64015ffc054e15673ff15fbbb7ad0c6a631251217
+size 39321600
diff --git a/params_shard_128.bin b/params_shard_128.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a2bd88569164c30ad9073354af202c53f39f4fc7
--- /dev/null
+++ b/params_shard_128.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3415f67c6fe198dcfdfdd492f5ccc1cf9f1c5791217cc89451b42deb1f8bd101
+size 29532160
diff --git a/params_shard_129.bin b/params_shard_129.bin
new file mode 100644
index 0000000000000000000000000000000000000000..67430ae46554440b3e04bf2c8c9f31cc61ccca55
--- /dev/null
+++ b/params_shard_129.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2c9ba7c9b16911f4644dbf87cc6ad3318ca36f7286f629ed840bd6d849e9803a
+size 78643200
diff --git a/params_shard_13.bin b/params_shard_13.bin
new file mode 100644
index 0000000000000000000000000000000000000000..452afbfbe830562f2113ca84262d932756243f88
--- /dev/null
+++ b/params_shard_13.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ece41edba2acb34e9a69e17c2d6e49f3f069a0fe86994d6fecd28489c5244c05
+size 39321600
diff --git a/params_shard_130.bin b/params_shard_130.bin
new file mode 100644
index 0000000000000000000000000000000000000000..72797167936b6676cf5f8dfebef51caed8d0200e
--- /dev/null
+++ b/params_shard_130.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1a93ecd318a052cb71a54c2ec2dbd77adbc5d9f41e13952264c1d9b38b5733b8
+size 39321600
diff --git a/params_shard_131.bin b/params_shard_131.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f011f8d716b0c3d86559f8cbe022251ac4f4dc71
--- /dev/null
+++ b/params_shard_131.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b21dae64a8ea61819d39cc4f8adebb4f2839f7ca5d8125fcd04714ca8fb5611
+size 32788480
diff --git a/params_shard_132.bin b/params_shard_132.bin
new file mode 100644
index 0000000000000000000000000000000000000000..bdc8ea8d2925a9b1dd6f25aea33cfcd1ff4e8082
--- /dev/null
+++ b/params_shard_132.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:651bcd1eeab0eb996aa9998fc9afd4efbb900377afbfc155e610e6a22aaf11e7
+size 39321600
diff --git a/params_shard_133.bin b/params_shard_133.bin
new file mode 100644
index 0000000000000000000000000000000000000000..42eeaf1716ff1264d7bf252f6348a09da37b5850
--- /dev/null
+++ b/params_shard_133.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e219b013fa2d0570acdb1ca27a6613d07322e40e1d43a54619e86b97e48e3c1
+size 78643200
diff --git a/params_shard_134.bin b/params_shard_134.bin
new file mode 100644
index 0000000000000000000000000000000000000000..540494baafff84ab7b1f4edc104514998e1f494e
--- /dev/null
+++ b/params_shard_134.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b1387b0e99bcbc2e2b0a36e0343209c13f0e6f0adc5d821dcb586cbbc867400
+size 39321600
diff --git a/params_shard_135.bin b/params_shard_135.bin
new file mode 100644
index 0000000000000000000000000000000000000000..78799ec61d64d7eaf3e40c1e9ba9ba258e2131f0
--- /dev/null
+++ b/params_shard_135.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aca87af63a1fa83a29a8e309dcf21a0a5cf278a681cda14037fd6dc543c72987
+size 21340160
diff --git a/params_shard_136.bin b/params_shard_136.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7411fe48831fe79769a0696227e5b281301d1fde
--- /dev/null
+++ b/params_shard_136.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6be2b299a344df7910213c434242d135428d6503709e577bb51bf18292584611
+size 39321600
diff --git a/params_shard_137.bin b/params_shard_137.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7efadf7ac4f0b68b52bc1acf88ad5a8f280fdd4d
--- /dev/null
+++ b/params_shard_137.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:449f8ce5dd7eede805dfd62577a20fad3fc9c2ddd943e36b59321247e653bdcd
+size 78643200
diff --git a/params_shard_138.bin b/params_shard_138.bin
new file mode 100644
index 0000000000000000000000000000000000000000..6a7feede24cc909faa852a89a0c88f0aab655146
--- /dev/null
+++ b/params_shard_138.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2c8773d276267599bd788dd9404ed59e8d80f80a225526b91164af3c203d0f57
+size 39321600
diff --git a/params_shard_139.bin b/params_shard_139.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a063cd206020e7fc967a9635bf2904ef3b566301
--- /dev/null
+++ b/params_shard_139.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:63840112921d657b2a30535e6f1b220d648da5c536e61860bbb96fb065d19e9b
+size 29532160
diff --git a/params_shard_14.bin b/params_shard_14.bin
new file mode 100644
index 0000000000000000000000000000000000000000..cfe2339441c91e7aa0c7267b37fb8475c55163ce
--- /dev/null
+++ b/params_shard_14.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:96ef5f6d07e83272b8de14740aa961ea9e44a97ece6ca036a1eeee4762b41ffe
+size 29532160
diff --git a/params_shard_140.bin b/params_shard_140.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f3e0bf9fe66ac790c66fc39d05065f7156514b2a
--- /dev/null
+++ b/params_shard_140.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:33feb4a518550be91bbccfdbb225c08909727e27f8c4042a472886703c677e25
+size 39321600
diff --git a/params_shard_141.bin b/params_shard_141.bin
new file mode 100644
index 0000000000000000000000000000000000000000..25ade775bd3d2a4ee7313aac0ad2030d4d8392a8
--- /dev/null
+++ b/params_shard_141.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:307201ebf2e7c35ecb57d53d19f820370f3cf6e86f9c065425b1fb6629501331
+size 78643200
diff --git a/params_shard_142.bin b/params_shard_142.bin
new file mode 100644
index 0000000000000000000000000000000000000000..9453cc0851e7e0d4c2b29963fa5f5f40e082dc63
--- /dev/null
+++ b/params_shard_142.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f5c77442280741ce17274e8596ab91c8f783ed2d984eb11755be9259a396cd6
+size 24596480
diff --git a/params_shard_143.bin b/params_shard_143.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e6b5362e52cc18e6a28bbe9f0716eaf6c0d4f7b6
--- /dev/null
+++ b/params_shard_143.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d435e254777159fd864d7b9fd9ff0932beaf9cd0452c19928ecf0a3a78ae959f
+size 39321600
diff --git a/params_shard_144.bin b/params_shard_144.bin
new file mode 100644
index 0000000000000000000000000000000000000000..aa6d44d8766a7cff4d9165bf09e7c3b3916889cf
--- /dev/null
+++ b/params_shard_144.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b4c62cfc91613c661131378d93e7a41b51dfa441a3380052574c4019a1b6892f
+size 39321600
diff --git a/params_shard_145.bin b/params_shard_145.bin
new file mode 100644
index 0000000000000000000000000000000000000000..6730ca07df350e6c46914dcbf36f1d13a03423d7
--- /dev/null
+++ b/params_shard_145.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f3553a9238e55623ab510e799cdd8d0294b4216753c7855c7e9d16887875ad2c
+size 29532160
diff --git a/params_shard_146.bin b/params_shard_146.bin
new file mode 100644
index 0000000000000000000000000000000000000000..419bc1c8ac5bea93c66e7b671959a4ca8c432c93
--- /dev/null
+++ b/params_shard_146.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1d0e8d3fca3cda7a17438b85150fc12f5e39452ab4ebd4cea4b37682ae4675c2
+size 78643200
diff --git a/params_shard_147.bin b/params_shard_147.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c1f6b0b9e223b2be052bc3712ed75bfef0aca34b
--- /dev/null
+++ b/params_shard_147.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:254f001502517f32d18cc3c7cb798aa65ff88dc8466e01bd4c62e87fb9bf8df2
+size 39321600
diff --git a/params_shard_148.bin b/params_shard_148.bin
new file mode 100644
index 0000000000000000000000000000000000000000..0dc5ddb55c750546609b32d4b71e96e7799a609a
--- /dev/null
+++ b/params_shard_148.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d3c0ff19b95d2074b5794d9e8edd4dc4b5e26fde5f4d81e2355e53d05af4911
+size 32788480
diff --git a/params_shard_149.bin b/params_shard_149.bin
new file mode 100644
index 0000000000000000000000000000000000000000..6d35a1bda00ba55c96b51a47f784e9e29d3415ff
--- /dev/null
+++ b/params_shard_149.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:06552330f61203a6d5b3a9ec30821638c3391731cf6d9efa162b5f980866a480
+size 39321600
diff --git a/params_shard_15.bin b/params_shard_15.bin
new file mode 100644
index 0000000000000000000000000000000000000000..0b9c0ba4f7abc257559802f6e94f3c43cb9b1d0c
--- /dev/null
+++ b/params_shard_15.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec540b0a143f9576f44d292d6573aaf592634069780f37f9a54500c805e3d637
+size 39321600
diff --git a/params_shard_150.bin b/params_shard_150.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f6565de34c91225d0d277deac9c36955d86dac5f
--- /dev/null
+++ b/params_shard_150.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01584fa5e8d1bb2fb78304149651cca9d2a9cc3c9bdddcc130626b8e2c7abcb4
+size 78643200
diff --git a/params_shard_151.bin b/params_shard_151.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f38916580683669b1c93de637330af595739bf91
--- /dev/null
+++ b/params_shard_151.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ad81c03f4db5c3aea2eb637f75dbf79934f218d5e8398cbee150d0bbdf501941
+size 39321600
diff --git a/params_shard_152.bin b/params_shard_152.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2c35cb027550fab745761a185053f86f1ecc5146
--- /dev/null
+++ b/params_shard_152.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f1fbe746ece796a1e4e5b311ddc027da3ed9dee7fa5bba23d1bba6c45f36446
+size 21340160
diff --git a/params_shard_153.bin b/params_shard_153.bin
new file mode 100644
index 0000000000000000000000000000000000000000..9e6f92be6321e9a67ff3c765b51373e6eef34f43
--- /dev/null
+++ b/params_shard_153.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:83857f59f39e61971536eedc81b7783413269fb3d0742ae22f99e0826a90e069
+size 39321600
diff --git a/params_shard_154.bin b/params_shard_154.bin
new file mode 100644
index 0000000000000000000000000000000000000000..20e5c8c3b8e07512a9f56195e966883ba34173e8
--- /dev/null
+++ b/params_shard_154.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d54d8441cb911f998ec515cae7b943b83ea2b2eb6dce716e746c84062a4bac3
+size 78643200
diff --git a/params_shard_155.bin b/params_shard_155.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c37173e456d9061d4a655667c669f6aaaaa2317f
--- /dev/null
+++ b/params_shard_155.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2ebb909fb8fc229e959296490054089bf10271d3d9f0436c6235532be5cf0fb5
+size 39321600
diff --git a/params_shard_156.bin b/params_shard_156.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e8af83e1e040f8cb85570817444ad2ef810e8d0f
--- /dev/null
+++ b/params_shard_156.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:71b6f4a53a929dc1c0b24222650e1bbec35dbe2e9edc821e516392a838044d4e
+size 29532160
diff --git a/params_shard_157.bin b/params_shard_157.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b5c30a465364a5001d1b07e40aff593eaf346281
--- /dev/null
+++ b/params_shard_157.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b7dd854fa2dbe32acb93e8e110ea0cb2acd21cb30a3313ebf7f2a2eceadd8994
+size 39321600
diff --git a/params_shard_158.bin b/params_shard_158.bin
new file mode 100644
index 0000000000000000000000000000000000000000..0d87ff367c1f14117f3fde48be343653d8798437
--- /dev/null
+++ b/params_shard_158.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:73610e395d4313a167dcd0350a2344c7da46254bd1de94d98d19d5a83e70fb2d
+size 78643200
diff --git a/params_shard_159.bin b/params_shard_159.bin
new file mode 100644
index 0000000000000000000000000000000000000000..747eaab86e0c8500b5b862a78163d1869215056f
--- /dev/null
+++ b/params_shard_159.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:74e9b6e222b00efdab3aeb015abd5b4697b23e7e3564b144a81504ce02ea744c
+size 24596480
diff --git a/params_shard_16.bin b/params_shard_16.bin
new file mode 100644
index 0000000000000000000000000000000000000000..663d511ecef51da4b475a85a487abc881f94cb18
--- /dev/null
+++ b/params_shard_16.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a0d1637294f58d2ad75b4a0515a350e8b17de3e8a56abca58b560c7ba626d8de
+size 78643200
diff --git a/params_shard_160.bin b/params_shard_160.bin
new file mode 100644
index 0000000000000000000000000000000000000000..9717599f0bd86b8dbfacbedf3e6d7877bb3709bc
--- /dev/null
+++ b/params_shard_160.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:943a8e58ef8892154a016f690c7ae90b2418a0adf9fe59a44f5eb1a6400ab266
+size 39321600
diff --git a/params_shard_161.bin b/params_shard_161.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d91c57ab2eb59d10f8df2a2aace5f89e969f2418
--- /dev/null
+++ b/params_shard_161.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2ab20d67ca48204fe979a05dca4e292cdd4f2713a7a3b8f351aa3a5f51e322e0
+size 39321600
diff --git a/params_shard_162.bin b/params_shard_162.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8f771fda47e07f985546018cd9787ada6a9095c5
--- /dev/null
+++ b/params_shard_162.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:43a6d7588b4a974d3b1aef643b7e461546db257a8dbab776961a62e526d78ae4
+size 29532160
diff --git a/params_shard_163.bin b/params_shard_163.bin
new file mode 100644
index 0000000000000000000000000000000000000000..81981e237b37364d84bb2a1989b2b503a0989085
--- /dev/null
+++ b/params_shard_163.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f9e0c0e1bd83cd1ca80bbcd30ae8e9d4aac330a493f4550ffa7fbd4c17c14679
+size 78643200
diff --git a/params_shard_164.bin b/params_shard_164.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8630c94d2d822a10346dd05322bcf3f07055da36
--- /dev/null
+++ b/params_shard_164.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01127630d47ac65787e5c8793d3822d122176b24a3bb28a24723b4a9acc66257
+size 39321600
diff --git a/params_shard_165.bin b/params_shard_165.bin
new file mode 100644
index 0000000000000000000000000000000000000000..45bb6c47a7a986aada4fda91080c5dfe371eb2d3
--- /dev/null
+++ b/params_shard_165.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bfbf99fdd30b88b73b4812bd578b301a8d29529e1f26d32809e5071489aa0079
+size 32788480
diff --git a/params_shard_166.bin b/params_shard_166.bin
new file mode 100644
index 0000000000000000000000000000000000000000..74b9a008c97877b2ad341c3e24dce4272cc98af7
--- /dev/null
+++ b/params_shard_166.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e3a4568f840531f780f2c1ba797698c073f44e5c103f8e2c0c114ce8880c4d1
+size 39321600
diff --git a/params_shard_167.bin b/params_shard_167.bin
new file mode 100644
index 0000000000000000000000000000000000000000..824a2ea9c76a9835e9ecc24fa4096f439f94d320
--- /dev/null
+++ b/params_shard_167.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:747250ac0bab1e3a8bfdce4fee75814ea2c82719dbd1e358d7e9d7db4a853590
+size 78643200
diff --git a/params_shard_168.bin b/params_shard_168.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d37ba5974e79def6622dbe57a655fdfb4cfb06df
--- /dev/null
+++ b/params_shard_168.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e6120085111e8e0a549f9d44dc31d183fb3b608816eafa41ce4663022739bbb1
+size 39321600
diff --git a/params_shard_169.bin b/params_shard_169.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e34c6161c250a5e992450eced071b7ddf3f8e210
--- /dev/null
+++ b/params_shard_169.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea7fb17fb28795192f9d16fa38f422993b808eda943eebc0b56a53a9b9a2f32b
+size 21340160
diff --git a/params_shard_17.bin b/params_shard_17.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d0e2c47a3676455b13cbfc39fc5f2cfa0a7c0293
--- /dev/null
+++ b/params_shard_17.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf67cc84f2d778e2f7269ff77f917871d2474bd6b63611009b36529825914416
+size 24596480
diff --git a/params_shard_170.bin b/params_shard_170.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a9f650fe23ece7ddbf97c6d6fc27e29b73e2cd8a
--- /dev/null
+++ b/params_shard_170.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2d047d7ba99f0252b36badc502365383ec8c82b5d29dec0b11c304b6253b1a11
+size 78643200
diff --git a/params_shard_171.bin b/params_shard_171.bin
new file mode 100644
index 0000000000000000000000000000000000000000..87c0740a89d9a9cdfb70dc928ff4890a6926c3d6
--- /dev/null
+++ b/params_shard_171.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e2cfaf174da1d8dbb4e4b87549d0844e835114e0a16e689b79c3a590b6d1a046
+size 39321600
diff --git a/params_shard_172.bin b/params_shard_172.bin
new file mode 100644
index 0000000000000000000000000000000000000000..9b70af07720958d9ebf780142c445e6d2f0093aa
--- /dev/null
+++ b/params_shard_172.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e4f6d008963a3f4a2ec58dd1e8f0ae79a4620bf4dd65988da20c2e7f204df375
+size 29491200
diff --git a/params_shard_173.bin b/params_shard_173.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f91ceea94f2aa6020b0c09237a713cd97a5079fd
--- /dev/null
+++ b/params_shard_173.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:252d011cca340bfb53d2f7f1f924e8c18b026e8213d28001208d3fce67072d71
+size 14745600
diff --git a/params_shard_18.bin b/params_shard_18.bin
new file mode 100644
index 0000000000000000000000000000000000000000..27a5c61c8e3ba6748361be01908161486732e983
--- /dev/null
+++ b/params_shard_18.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3572356ff7100befe4be187f0cae28d5962eb05bc4802fd64429be483fbf7668
+size 39321600
diff --git a/params_shard_19.bin b/params_shard_19.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a153d1864faa8d8cf8a2ef688e6fe00eed3d519d
--- /dev/null
+++ b/params_shard_19.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c34999f269711513a183bcd3148062d21abad8683da3e3a555880806c9d1a67c
+size 39321600
diff --git a/params_shard_2.bin b/params_shard_2.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8a041298d0f04baa43244f8b3926c9af64e07824
--- /dev/null
+++ b/params_shard_2.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0fce64e6254caef24e6ce77a8ce72238cb4de0c028805d3c5cf2e865f7a1532c
+size 39321600
diff --git a/params_shard_20.bin b/params_shard_20.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5b2089ccbf0ad38506d99ae68e9905220f3cdb48
--- /dev/null
+++ b/params_shard_20.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6ab325cc8510f7bf93f896a9ee1fd9e00348b82bb5eb9fad91736136bc92d476
+size 29532160
diff --git a/params_shard_21.bin b/params_shard_21.bin
new file mode 100644
index 0000000000000000000000000000000000000000..01878129f47c24e0ebda837c124717737db5e0a1
--- /dev/null
+++ b/params_shard_21.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:051d3549605a7884be517e3d2d3c3f3685a648056359a7841f807429fb52ea28
+size 78643200
diff --git a/params_shard_22.bin b/params_shard_22.bin
new file mode 100644
index 0000000000000000000000000000000000000000..1fc3ad6df3df1b11dd4abd7544ad27302c7ec479
--- /dev/null
+++ b/params_shard_22.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a0daf2d2d304510a0c216ac693445d9ddaba983e89798367346e58ef9a3523b
+size 39321600
diff --git a/params_shard_23.bin b/params_shard_23.bin
new file mode 100644
index 0000000000000000000000000000000000000000..100724573c5b9f6cd917f30654a5137ab1b205dc
--- /dev/null
+++ b/params_shard_23.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:17544bd9505453d1ffd3c08608c733f3c4cb246c01f118e38b493511e001cfff
+size 32788480
diff --git a/params_shard_24.bin b/params_shard_24.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4aab48018ee7da87d7214b2343636b8d59d0bc30
--- /dev/null
+++ b/params_shard_24.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:15f8e686299c79446708834d16acb28ebd9eb43de76fe91081eb246a70d701bd
+size 39321600
diff --git a/params_shard_25.bin b/params_shard_25.bin
new file mode 100644
index 0000000000000000000000000000000000000000..56734b54964b64b877f317d47252f77c065efdaa
--- /dev/null
+++ b/params_shard_25.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:211776ea9234b88eab924d793753b03e0582c66b3292da23a60580413ea6be2d
+size 78643200
diff --git a/params_shard_26.bin b/params_shard_26.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ffaa84d3e33d83b0ca8d8c372bc3835424f1b391
--- /dev/null
+++ b/params_shard_26.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cbf203ed07507e946c50ecffd430ff680951c0fe1833eab1c3b349126c3d3dbd
+size 39321600
diff --git a/params_shard_27.bin b/params_shard_27.bin
new file mode 100644
index 0000000000000000000000000000000000000000..0bc8858fc760f25a8ec06c2e6887de59f46c10cc
--- /dev/null
+++ b/params_shard_27.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f2d2ef2d1b6a03633006629410394cbb4e3a8b0eba2c3f8722c8be8df328de3
+size 21340160
diff --git a/params_shard_28.bin b/params_shard_28.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f5ce908f761f46e7431e82a873c35e7168ca7178
--- /dev/null
+++ b/params_shard_28.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9154d5decd521adf8d19dc401e1301698566d69e418f5193136e0e801178c223
+size 39321600
diff --git a/params_shard_29.bin b/params_shard_29.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8210746051c53fac8553aa8acc6caf717ee24f7b
--- /dev/null
+++ b/params_shard_29.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:172ed945920b71314a108aaed184c0669152de1ca76de17b95cd67e86413d387
+size 78643200
diff --git a/params_shard_3.bin b/params_shard_3.bin
new file mode 100644
index 0000000000000000000000000000000000000000..210d0781fe2bac1b5dde1097a774251e298a2a45
--- /dev/null
+++ b/params_shard_3.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8a5e3c387fb2575bd14fa87dd74358c80858a3447f902ae01b6af55a62cb2a7e
+size 32051200
diff --git a/params_shard_30.bin b/params_shard_30.bin
new file mode 100644
index 0000000000000000000000000000000000000000..21a4e80161dc44950e4893928214efaf5951c091
--- /dev/null
+++ b/params_shard_30.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1950fbac6211b6fed266cf94adce15a8c513c81965b4e2c09830a5cfb0d4eb93
+size 39321600
diff --git a/params_shard_31.bin b/params_shard_31.bin
new file mode 100644
index 0000000000000000000000000000000000000000..1924e4e3efcb230678c6a48cc528e3930c0d751b
--- /dev/null
+++ b/params_shard_31.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ea5045028ba5bc87effecbb1e5975f3ce9e06cdd0156a1de5b61c89fc7ca7166
+size 29532160
diff --git a/params_shard_32.bin b/params_shard_32.bin
new file mode 100644
index 0000000000000000000000000000000000000000..e7ee43a8daccdc9efd6a38ec749ffec3a5716f57
--- /dev/null
+++ b/params_shard_32.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:89bc3b7cd38ebfb02abf166b72c4797961d90556ee3afe578cf9741981d92527
+size 39321600
diff --git a/params_shard_33.bin b/params_shard_33.bin
new file mode 100644
index 0000000000000000000000000000000000000000..58ea8edf3137bc411bb3ecda767e8c84a134f860
--- /dev/null
+++ b/params_shard_33.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10a45b2968df3ea2be076354be0b742da603ffba7e34340e8abebfcbe8f3f899
+size 78643200
diff --git a/params_shard_34.bin b/params_shard_34.bin
new file mode 100644
index 0000000000000000000000000000000000000000..0489f374c9356dea195b94c6bf04d1143ff06f47
--- /dev/null
+++ b/params_shard_34.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0ec8f394284886101f6b962c03154e56c6f499cb1d6373eea98cec8309c0b805
+size 24596480
diff --git a/params_shard_35.bin b/params_shard_35.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b01a5043c4a21d88615f8a901bbf086256e797b5
--- /dev/null
+++ b/params_shard_35.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f3b1545de2b90531e0976ba161056fe6faa8963767f9ff132eaaadf4c6ce1843
+size 39321600
diff --git a/params_shard_36.bin b/params_shard_36.bin
new file mode 100644
index 0000000000000000000000000000000000000000..503b3d6a99e8838166da68527488ccf2928d4c9b
--- /dev/null
+++ b/params_shard_36.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b58f64e05704b1771f1c3ad7c77e7217bb23a4e654d570543bfed9587d5ffa6
+size 39321600
diff --git a/params_shard_37.bin b/params_shard_37.bin
new file mode 100644
index 0000000000000000000000000000000000000000..9fee79168e21fd7f8a7fa7889b3675b733d2ee2e
--- /dev/null
+++ b/params_shard_37.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c728074abc3bc3e1fb1f46b0c0908c5fc50eb3539c868415ad255b8626491cec
+size 29532160
diff --git a/params_shard_38.bin b/params_shard_38.bin
new file mode 100644
index 0000000000000000000000000000000000000000..70ac461d0ba44304c2fd1a5228fa168d57823ea2
--- /dev/null
+++ b/params_shard_38.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a350040336294e4e7d24ee250bae0d12c50ee2456ea782f288aa1181a0bb37ad
+size 78643200
diff --git a/params_shard_39.bin b/params_shard_39.bin
new file mode 100644
index 0000000000000000000000000000000000000000..171259e27135f887fd358a76ca042b8a647d6268
--- /dev/null
+++ b/params_shard_39.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ead3f3e9cf1c77a5851deb49b27af79051367424fd6ec53ce10910724883a5a4
+size 39321600
diff --git a/params_shard_4.bin b/params_shard_4.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5968756e3af569f120b187525a0720f44b857e90
--- /dev/null
+++ b/params_shard_4.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:19e4b387dab0b501005db795283e09210f6a6599c96b183a008caf147907000d
+size 78643200
diff --git a/params_shard_40.bin b/params_shard_40.bin
new file mode 100644
index 0000000000000000000000000000000000000000..1196f57a5f85a3af7319a1c4df9f007524e16251
--- /dev/null
+++ b/params_shard_40.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ee66632b33878fe8e629e523617e66620ff3b92edd744c867f560ea389f7c632
+size 32788480
diff --git a/params_shard_41.bin b/params_shard_41.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b34f4847448886e4edea8690b184d959b621806f
--- /dev/null
+++ b/params_shard_41.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd4e8d2af95d7543ef0c607aaa0ecea5b7bef308c152d1867bd7fa78a75a9d5a
+size 39321600
diff --git a/params_shard_42.bin b/params_shard_42.bin
new file mode 100644
index 0000000000000000000000000000000000000000..26ad52cf8e6beba2493bcc5135f96b35ede8df3a
--- /dev/null
+++ b/params_shard_42.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e81c47557e38206c1605499b9c003b639920cf4f7c033f04e648629441a60c6d
+size 78643200
diff --git a/params_shard_43.bin b/params_shard_43.bin
new file mode 100644
index 0000000000000000000000000000000000000000..66af1ac44d18eb1f0646e3f32127cbffd89839ff
--- /dev/null
+++ b/params_shard_43.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:481613cdee8fc5e490dc69a7dca6b2d42866556e21a92899243f7ae2603ace69
+size 39321600
diff --git a/params_shard_44.bin b/params_shard_44.bin
new file mode 100644
index 0000000000000000000000000000000000000000..57af2d89f371933f27100ac1d01d560c0741f858
--- /dev/null
+++ b/params_shard_44.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a7472cccb6e4033284a36d5e9bb6798c7da82622ccf69eccf9a67ad5d9e960bf
+size 21340160
diff --git a/params_shard_45.bin b/params_shard_45.bin
new file mode 100644
index 0000000000000000000000000000000000000000..17f0c01873275aa170a523ef2116a1bfd5d6179d
--- /dev/null
+++ b/params_shard_45.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:892f46713fcd59e04bcba8cdb89e6f0a4f8fb0ca9665c4ec6fa016365b26442c
+size 39321600
diff --git a/params_shard_46.bin b/params_shard_46.bin
new file mode 100644
index 0000000000000000000000000000000000000000..fdb9efaa12c8b33ba7adf54503e6a38588bb86dd
--- /dev/null
+++ b/params_shard_46.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ba24db90824ca727fe8a9aff9d85393b8c640cf93a3624f352b61ef8c95b05aa
+size 78643200
diff --git a/params_shard_47.bin b/params_shard_47.bin
new file mode 100644
index 0000000000000000000000000000000000000000..c7d1bb938f73caa83acdde170e6bc80eb9bbff41
--- /dev/null
+++ b/params_shard_47.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10022d164be19a8205feb29efe840369332eb4ccf2a975c9d70e9010a4dd814f
+size 39321600
diff --git a/params_shard_48.bin b/params_shard_48.bin
new file mode 100644
index 0000000000000000000000000000000000000000..26b87843d7e76f620fc20e997e668bfba11703f9
--- /dev/null
+++ b/params_shard_48.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:05caa0bad7dc6d0f4f8938b2ede6e0759805b49aeb364949d2768dbbe5b98a9a
+size 29532160
diff --git a/params_shard_49.bin b/params_shard_49.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d59e37fe377bfbb56affda7c20cbc6acdd7f8940
--- /dev/null
+++ b/params_shard_49.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ade85d267740af565055156a6fb8910d3c81f7e8a0e075ed2c0494b8301f117f
+size 39321600
diff --git a/params_shard_5.bin b/params_shard_5.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ad4a7f411a4158707867a467c3c4a2a1f7fdb3dd
--- /dev/null
+++ b/params_shard_5.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2b9383c0d481fb4cd37bfc9b3b596b15acd5a2ea89bc7518f084c70db99e42bf
+size 39321600
diff --git a/params_shard_50.bin b/params_shard_50.bin
new file mode 100644
index 0000000000000000000000000000000000000000..89f65d66a57d0bab5499f6f41ad520cb8ff9ce0b
--- /dev/null
+++ b/params_shard_50.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3809a3dbc4407487ae1dbb6a8ccaa72ce3979f2e91c14f8967950b604c067f78
+size 78643200
diff --git a/params_shard_51.bin b/params_shard_51.bin
new file mode 100644
index 0000000000000000000000000000000000000000..4817fed8fd47866944ab5eb6cc076940c211f72f
--- /dev/null
+++ b/params_shard_51.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c26e5b6d22b6c79b5b7040c987bac632f56d05424afdc0875d54480d7beac43b
+size 24596480
diff --git a/params_shard_52.bin b/params_shard_52.bin
new file mode 100644
index 0000000000000000000000000000000000000000..6620a674d46c8a88b2e4804d9a5a93b972eb6ef7
--- /dev/null
+++ b/params_shard_52.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bcecf2ef1cc502d755091e6461e9570783c5b5905a2952220713e365ed905af8
+size 39321600
diff --git a/params_shard_53.bin b/params_shard_53.bin
new file mode 100644
index 0000000000000000000000000000000000000000..66639b0ced3cb94bd3c1e5fad88b88cce785f91b
--- /dev/null
+++ b/params_shard_53.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:66e3be33cee1e4f1da4da01b85bee317f4e162e97d8ccb509ce10041d900b59d
+size 216596480
diff --git a/params_shard_54.bin b/params_shard_54.bin
new file mode 100644
index 0000000000000000000000000000000000000000..05b7922907ecc5a1ef5943d116f590e063c998c9
--- /dev/null
+++ b/params_shard_54.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1df32899db383956aeeaed615896cb4203eee4814f6d5e44dd7b0d73e2ff08ef
+size 27074560
diff --git a/params_shard_55.bin b/params_shard_55.bin
new file mode 100644
index 0000000000000000000000000000000000000000..9ba2f0c79e85cc5d1d1758d2c5dc6be96afc3b5e
--- /dev/null
+++ b/params_shard_55.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:865a0588986f77b1b76e8a95baaf2beedab3bdd032db14acae90c92401b078aa
+size 39321600
diff --git a/params_shard_56.bin b/params_shard_56.bin
new file mode 100644
index 0000000000000000000000000000000000000000..cb8347d690f5a61f9fd4d34c7ace8bd2765b230a
--- /dev/null
+++ b/params_shard_56.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2c11314318c284ce21321d2d0dffa14d22e35a1ebbb5f88df0423e9c2f3756ae
+size 29552640
diff --git a/params_shard_57.bin b/params_shard_57.bin
new file mode 100644
index 0000000000000000000000000000000000000000..682e82ebf17adda18a730a2e6d8a39b7ad173a0a
--- /dev/null
+++ b/params_shard_57.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:27fb57cf4862d6065487f3fc0efb6968b8776e4620e909b85ed99aaefe92fa16
+size 78643200
diff --git a/params_shard_58.bin b/params_shard_58.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f16cc164764ce99e666a18776c71161101b25848
--- /dev/null
+++ b/params_shard_58.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f3c1b08c53521dff1429fb4f827d5ceb6121fc456990bc030a9f4ae850ac895
+size 39321600
diff --git a/params_shard_59.bin b/params_shard_59.bin
new file mode 100644
index 0000000000000000000000000000000000000000..eca1effcea018760a5acf56db5b0f37c1dee395e
--- /dev/null
+++ b/params_shard_59.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1a5073e57ef5c2738e57271983543cc844bba0b9486b4e04c5245878ecb70e3b
+size 32788480
diff --git a/params_shard_6.bin b/params_shard_6.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3920914953126b7afdbdef84cb5615a09bc1227f
--- /dev/null
+++ b/params_shard_6.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:00a9a4a3cef0c5fbe119ef23596975c3237ef4ce92fc91551232f2ea8aa89827
+size 32788480
diff --git a/params_shard_60.bin b/params_shard_60.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3a5b15644030ab19f18c7a59497bc094432633c5
--- /dev/null
+++ b/params_shard_60.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4e1d37011903c8a1fe71f31739dc9a654655702e0c1b89aa0ceef02e6c635a92
+size 39321600
diff --git a/params_shard_61.bin b/params_shard_61.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3a1ed3beffa4fdf600f1c023f919a3d735404f52
--- /dev/null
+++ b/params_shard_61.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:50fa1715e2024ffc8f589e583a847c2c5cceefd6f0f32b8355c4eed45985999a
+size 78643200
diff --git a/params_shard_62.bin b/params_shard_62.bin
new file mode 100644
index 0000000000000000000000000000000000000000..934c494cc599f57d814a3e0a611b7f2553eb46d0
--- /dev/null
+++ b/params_shard_62.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c4a90f016486c3f27dc6e4483982b2779da36bb2f28b558cd21746063550158c
+size 39321600
diff --git a/params_shard_63.bin b/params_shard_63.bin
new file mode 100644
index 0000000000000000000000000000000000000000..1c0715d636d9a161a59373e6f9dd9a18913ab277
--- /dev/null
+++ b/params_shard_63.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ecc92518b3983ee2a9d27aedd9dbe16dce344fe1e11363cb9fa0dfdb1fd11ce3
+size 21340160
diff --git a/params_shard_64.bin b/params_shard_64.bin
new file mode 100644
index 0000000000000000000000000000000000000000..0a21a85357cbe5def64851d146368af027072bbd
--- /dev/null
+++ b/params_shard_64.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:944ed7dfc8328a95db936b0806c46a2c76bdb78cab707c86f439f0172bfc8152
+size 39321600
diff --git a/params_shard_65.bin b/params_shard_65.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8653e98e504f8d0eff21cc1f2eb8a9ce879fe701
--- /dev/null
+++ b/params_shard_65.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9beae2eb81a97de576e4d16cebdf927e201abb84770d226d990ada688739ffa
+size 78643200
diff --git a/params_shard_66.bin b/params_shard_66.bin
new file mode 100644
index 0000000000000000000000000000000000000000..7a3676a71359a61eaee02f37ced8e527d1cb15f5
--- /dev/null
+++ b/params_shard_66.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ab1a8690a722343e0145a6219431098bff0e8f70318a4fd5530451d38cf7895
+size 39321600
diff --git a/params_shard_67.bin b/params_shard_67.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3c417105b7815255c2d968f9066000914273521f
--- /dev/null
+++ b/params_shard_67.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f947aa4a5f61551833dbe9be5b2a38e55e5f94b3ca87fe7be7e785aa947be1b2
+size 29532160
diff --git a/params_shard_68.bin b/params_shard_68.bin
new file mode 100644
index 0000000000000000000000000000000000000000..46fe883b356a6a456653385f81ff3a947e46497d
--- /dev/null
+++ b/params_shard_68.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bef1cc1162fb3d1f0d60e5e83c82cb238ef66825ac52f022a6f725145405481d
+size 39321600
diff --git a/params_shard_69.bin b/params_shard_69.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b74a15b371e3a338e4b0f54fa16547e17a743b1c
--- /dev/null
+++ b/params_shard_69.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:62a5cbf413f79617f41ad226a21df3d3fadacc4da829293bdb9ea40e8c194a87
+size 78643200
diff --git a/params_shard_7.bin b/params_shard_7.bin
new file mode 100644
index 0000000000000000000000000000000000000000..95f50a02de4e12229852d02c7ecadd3d27bbcd1e
--- /dev/null
+++ b/params_shard_7.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c6e07e5e61dc289e298ff5378d89c3ac44d5b9a4c74373aae4aeef94618ca4de
+size 39321600
diff --git a/params_shard_70.bin b/params_shard_70.bin
new file mode 100644
index 0000000000000000000000000000000000000000..5fce70cf1a92f191f438688a0d53313c80b553ef
--- /dev/null
+++ b/params_shard_70.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a87b17a058c4d252688bfa135d43db9776f4f35f336c662a530c80bab5caa5b3
+size 24596480
diff --git a/params_shard_71.bin b/params_shard_71.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f17ea0f45b7dfb7dea1b2198999070ec37d4b671
--- /dev/null
+++ b/params_shard_71.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cb1497440c21059ee02f4bd02ae28cfbd7892fd562ec9813bcc8ffc732fafcd8
+size 39321600
diff --git a/params_shard_72.bin b/params_shard_72.bin
new file mode 100644
index 0000000000000000000000000000000000000000..1df6001b3c5c9ee215a6114e45acb4d90f06602f
--- /dev/null
+++ b/params_shard_72.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e85f2c078695312bcf52a96e169e47792b8907a365ee589ddedf3758fa45dc97
+size 39321600
diff --git a/params_shard_73.bin b/params_shard_73.bin
new file mode 100644
index 0000000000000000000000000000000000000000..76757066d9ca6175d4cc0e8d2af2a9ef4b7e5928
--- /dev/null
+++ b/params_shard_73.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fbf2eadf2c8a99702a6fb3d8883bb9f97892fea3c287ce77eb7a83005630b492
+size 29532160
diff --git a/params_shard_74.bin b/params_shard_74.bin
new file mode 100644
index 0000000000000000000000000000000000000000..08b3bed100ce17816762db8090cfa7b1b9a16ad3
--- /dev/null
+++ b/params_shard_74.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6a6e5d109ff87283cf4e63fda30cb12031f16eb33e47c26ab94edfde8c2cc020
+size 78643200
diff --git a/params_shard_75.bin b/params_shard_75.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a9c0c5a01c27a90b477117722d1d16f24b3b0886
--- /dev/null
+++ b/params_shard_75.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f04825f39e4ca8db123715ef21411fecc1d8151be3ff4103ed6869eeb125142
+size 39321600
diff --git a/params_shard_76.bin b/params_shard_76.bin
new file mode 100644
index 0000000000000000000000000000000000000000..775818b81b36a6e15a9611dc1d691b6e80a496df
--- /dev/null
+++ b/params_shard_76.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3c0eceb93077e66a8fd7fd7794c8459694ee4926502b7881add9f37dcee40925
+size 32788480
diff --git a/params_shard_77.bin b/params_shard_77.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a709d723e38c375c2547b55a72c8a17ec983f693
--- /dev/null
+++ b/params_shard_77.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c7477802e1df95bb2ff9f3276054b59138e03cdc87c650d251a435f87b29769
+size 39321600
diff --git a/params_shard_78.bin b/params_shard_78.bin
new file mode 100644
index 0000000000000000000000000000000000000000..260bced904c1f956ec3f64629f61609c6b97749d
--- /dev/null
+++ b/params_shard_78.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a62f55333992b5adfaaec0344bf811b51ab0f92440ebb517605c9ca2559f84b
+size 39321600
diff --git a/params_shard_79.bin b/params_shard_79.bin
new file mode 100644
index 0000000000000000000000000000000000000000..ec79dff085e0e33cc271eb99ceecfe67dde1f570
--- /dev/null
+++ b/params_shard_79.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7aca5db390ee4c7c8703f6e53a947176a0e1e4a87b979c3187ea1f0a93327857
+size 78643200
diff --git a/params_shard_8.bin b/params_shard_8.bin
new file mode 100644
index 0000000000000000000000000000000000000000..0879c99041ed30497c7d45bb22cd032cb8d942e8
--- /dev/null
+++ b/params_shard_8.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:24317bb786947a2524c53d7369fbd1944d45fa5c8e3bfc1ba8d8d28b48956c16
+size 78643200
diff --git a/params_shard_80.bin b/params_shard_80.bin
new file mode 100644
index 0000000000000000000000000000000000000000..968a1056e8f1c8de3bdb5e594e95c02731b47fc6
--- /dev/null
+++ b/params_shard_80.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:43cb392dbd83f142128531638cb112eb6ced62d95bc0f666d1cdf56cf5ed8602
+size 26234880
diff --git a/params_shard_81.bin b/params_shard_81.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2f4b5f8582c01c74c5fc60628372194e8349e39a
--- /dev/null
+++ b/params_shard_81.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2227e1407c0d212a7f88d1b2290d1e878b41004f1096901a43c096a1f0d4b36d
+size 39321600
diff --git a/params_shard_82.bin b/params_shard_82.bin
new file mode 100644
index 0000000000000000000000000000000000000000..51ce965be370fafc793bf4e70846a891871aadb8
--- /dev/null
+++ b/params_shard_82.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d835dc32c676a29c6defe2bd56a350f915a5419f60ff085b4cb74c251907445
+size 39321600
diff --git a/params_shard_83.bin b/params_shard_83.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f9cf3d978416f9b016b652f6bbe756738428cd1e
--- /dev/null
+++ b/params_shard_83.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64191b1fe0024ec18ac017e9eaac5af6e2564d2a7e9de380abb579f6039e32ee
+size 29532160
diff --git a/params_shard_84.bin b/params_shard_84.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f48fbe719878192a2b61c43d5d84da56c3f3f544
--- /dev/null
+++ b/params_shard_84.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:11da3cc3c677d387e95f9c917cfb65686f793d2ec698b800012de7c6473e2d4c
+size 78643200
diff --git a/params_shard_85.bin b/params_shard_85.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2c2552555b6ec5f6eb02bbf6fe3792f2dd4c1a18
--- /dev/null
+++ b/params_shard_85.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:09850def961870aa926322c9b635078c46c9893cff1dba85ed3026bb4794a214
+size 39321600
diff --git a/params_shard_86.bin b/params_shard_86.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8e200fe8882aad79f0ddceeccba2a96edc636c01
--- /dev/null
+++ b/params_shard_86.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9fa389cf98dc8f64f0836bc1836f4bd4f9b86c6e1151f142d7b673dfb7c8bc17
+size 32788480
diff --git a/params_shard_87.bin b/params_shard_87.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f2af9c3aeb66e84040936c04edd9810c9ece7137
--- /dev/null
+++ b/params_shard_87.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4b52e6050549dfe141fcb30c94f7e3735410213bc7d696ec84777ea6d84bad15
+size 39321600
diff --git a/params_shard_88.bin b/params_shard_88.bin
new file mode 100644
index 0000000000000000000000000000000000000000..17aa365c06563e223704f2db22392a52b4cdf541
--- /dev/null
+++ b/params_shard_88.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e28aa5ac469d188a6f047b90112f56efa563e2d7a29c107d95428eb48b235d9e
+size 78643200
diff --git a/params_shard_89.bin b/params_shard_89.bin
new file mode 100644
index 0000000000000000000000000000000000000000..95196ae52b241cd8544ed83d5ae7b40e14b51ad6
--- /dev/null
+++ b/params_shard_89.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b6c09b68bbf3f47e29240075a997dff36ea626357cf374f1e36cf1926ebcdf46
+size 39321600
diff --git a/params_shard_9.bin b/params_shard_9.bin
new file mode 100644
index 0000000000000000000000000000000000000000..d4492712c4ee32f7b6eb6752df9c7715798d39ae
--- /dev/null
+++ b/params_shard_9.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:53a808095452c528b490b4ce35bed9ca14814f8a2180cc5059c1cb6087fd4437
+size 39321600
diff --git a/params_shard_90.bin b/params_shard_90.bin
new file mode 100644
index 0000000000000000000000000000000000000000..145d016d5660d93ecb7c72f31116690c3aa11dab
--- /dev/null
+++ b/params_shard_90.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ad712f10dd966d182473a070b5d4571dac2e5c5e6ee5ee3362e7a331be61ae6
+size 21340160
diff --git a/params_shard_91.bin b/params_shard_91.bin
new file mode 100644
index 0000000000000000000000000000000000000000..351d1bd89572dff465a0556d12a20bf5ec8023dd
--- /dev/null
+++ b/params_shard_91.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b1cd827c3d1233765d2d63df4d59c03f7c07fe3b5c428ecfab6f5fbf154faf1
+size 39321600
diff --git a/params_shard_92.bin b/params_shard_92.bin
new file mode 100644
index 0000000000000000000000000000000000000000..f0dc67ce629ffbefb3f01e90de4cf4e0727496db
--- /dev/null
+++ b/params_shard_92.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dd0f40ccef9f8bca662dab943482d772c84880a399276463b282c587dd2910ed
+size 78643200
diff --git a/params_shard_93.bin b/params_shard_93.bin
new file mode 100644
index 0000000000000000000000000000000000000000..b3109eb81225c543b1094c5e72b5fb76e2d76981
--- /dev/null
+++ b/params_shard_93.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe68641ff9d102f639c01e32b5e896db920895a34af9620c5197d6f6868fc7f2
+size 39321600
diff --git a/params_shard_94.bin b/params_shard_94.bin
new file mode 100644
index 0000000000000000000000000000000000000000..88007ff22401cc5f72444f1be173c9d60c123929
--- /dev/null
+++ b/params_shard_94.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1adcaba4707e7bd26d0b6e5e59741837e75d5c0055eaf91e4c4a53a47f229bb6
+size 29532160
diff --git a/params_shard_95.bin b/params_shard_95.bin
new file mode 100644
index 0000000000000000000000000000000000000000..47d66a019468db4ccb066c729430ae82c4ce002d
--- /dev/null
+++ b/params_shard_95.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0075a462d9c1c130e5ad40a14338b369bf1666daec8d440640b6eb5d6f75e617
+size 39321600
diff --git a/params_shard_96.bin b/params_shard_96.bin
new file mode 100644
index 0000000000000000000000000000000000000000..89c9d7aa6e4f1a4cb6ca74cc30be321ba132901e
--- /dev/null
+++ b/params_shard_96.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10c32331feb33fa450d494ca9089cdaf78ae2c0ec93615e8f1359951abb6cfd6
+size 78643200
diff --git a/params_shard_97.bin b/params_shard_97.bin
new file mode 100644
index 0000000000000000000000000000000000000000..2e8ed70c244ef1395fb3b4d4b825cd8596c8567e
--- /dev/null
+++ b/params_shard_97.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb1acef7f4e78f98fca0830af418da1c594e8d9990f9d99e9978da777139a2b4
+size 24596480
diff --git a/params_shard_98.bin b/params_shard_98.bin
new file mode 100644
index 0000000000000000000000000000000000000000..3198ba000f37c0a9b5546104d30d9a887aa33853
--- /dev/null
+++ b/params_shard_98.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff619be3ad6a6fced05a97ac7bac9b12657a3ed9f79df98d79f6f9fd996a8825
+size 39321600
diff --git a/params_shard_99.bin b/params_shard_99.bin
new file mode 100644
index 0000000000000000000000000000000000000000..8bd3c3fad8776e2f892918189041099cafdd70aa
--- /dev/null
+++ b/params_shard_99.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d4e34a877f57b8ece78c10937bdedf1f019295b59f518573c2ad28e50605f23
+size 39321600
diff --git a/tokenizer.model b/tokenizer.model
new file mode 100644
index 0000000000000000000000000000000000000000..4eca1a991ced7be285cfd9da4499d33b36ee4d42
--- /dev/null
+++ b/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ded43118b7418f56db97a4eed08a5c265c03120158229ddd4fbcc9658241d5f0
+size 1520600
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..4f9cba4dfeaa633140028f1bf99faa07e2bb6653
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,47 @@
+{
+ "add_bos_token": false,
+ "add_eos_token": false,
+ "auto_map": {
+ "AutoTokenizer": [
+ "tokenization_orion.OrionTokenizer",
+ null
+ ]
+ },
+ "bos_token": {
+ "__type": "AddedToken",
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": true
+ },
+ "clean_up_tokenization_spaces": false,
+ "chat_template": "{% for message in messages %}{% if loop.first %}{{ bos_token }}{% endif %}{% if message['role'] == 'user' %}{{ 'Human: ' + message['content'] + '\n\nAssistant: ' + eos_token }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% endif %}{% endfor %}",
+ "eos_token": {
+ "__type": "AddedToken",
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": true
+ },
+ "model_max_length": 4096,
+ "pad_token": {
+ "__type": "AddedToken",
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": true
+ },
+ "sp_model_kwargs": {},
+ "tokenizer_class": "OrionTokenizer",
+ "unk_token": {
+ "__type": "AddedToken",
+ "content": "",
+ "lstrip": false,
+ "normalized": true,
+ "rstrip": false,
+ "single_word": true
+ }
+}