illian01 commited on
Commit
790e1b5
·
1 Parent(s): 41e502e

Add weights and config

Browse files
mlc-chat-config.json ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "0.1.0",
3
+ "model_type": "llama",
4
+ "quantization": "q4f16_0",
5
+ "model_config": {
6
+ "hidden_size": 4096,
7
+ "intermediate_size": 11008,
8
+ "num_attention_heads": 32,
9
+ "num_hidden_layers": 12,
10
+ "rms_norm_eps": 1e-06,
11
+ "vocab_size": 32000,
12
+ "position_embedding_base": 10000,
13
+ "context_window_size": 2048,
14
+ "prefill_chunk_size": 2048,
15
+ "num_key_value_heads": 32,
16
+ "head_dim": 128,
17
+ "tensor_parallel_shards": 1,
18
+ "max_batch_size": 80
19
+ },
20
+ "vocab_size": 32000,
21
+ "context_window_size": 2048,
22
+ "sliding_window_size": -1,
23
+ "prefill_chunk_size": 2048,
24
+ "attention_sink_size": -1,
25
+ "tensor_parallel_shards": 1,
26
+ "temperature": 1.0,
27
+ "presence_penalty": 0.0,
28
+ "frequency_penalty": 0.0,
29
+ "repetition_penalty": 1.0,
30
+ "top_p": 1.0,
31
+ "tokenizer_files": [
32
+ "tokenizer.model",
33
+ "tokenizer_config.json",
34
+ "tokenizer.json"
35
+ ],
36
+ "tokenizer_info": {
37
+ "token_postproc_method": "byte_fallback",
38
+ "prepend_space_in_encode": true,
39
+ "strip_space_in_decode": true
40
+ },
41
+ "conv_template": {
42
+ "name": "st-llm",
43
+ "system_template": "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{MessagePlaceholders.SYSTEM.value}</s>",
44
+ "system_message": "You are a helpful, respectful and honest assistant.",
45
+ "system_prefix_token_ids": null,
46
+ "add_role_after_system_message": true,
47
+ "roles": {
48
+ "user": "### Input:",
49
+ "assistant": "### Response:"
50
+ },
51
+ "role_templates": {
52
+ "user": "{user_message}",
53
+ "assistant": "{assistant_message}",
54
+ "tool": "{tool_message}"
55
+ },
56
+ "messages": [],
57
+ "seps": [
58
+ "</s>"
59
+ ],
60
+ "role_content_sep": "\n",
61
+ "role_empty_sep": "\n",
62
+ "stop_str": [
63
+ "</s>"
64
+ ],
65
+ "stop_token_ids": [
66
+ 2
67
+ ],
68
+ "function_string": "",
69
+ "use_function_calling": false
70
+ },
71
+ "pad_token_id": 0,
72
+ "bos_token_id": 1,
73
+ "eos_token_id": 2
74
+ }
ndarray-cache.json ADDED
@@ -0,0 +1,1719 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 125,
4
+ "ParamBytes": 1513693184.0,
5
+ "BitsPerParam": 4.500437647753687
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 65536000,
12
+ "records": [
13
+ {
14
+ "name": "model.embed_tokens.q_weight",
15
+ "shape": [
16
+ 32000,
17
+ 512
18
+ ],
19
+ "dtype": "uint32",
20
+ "format": "f32-to-bf16",
21
+ "nbytes": 65536000,
22
+ "byteOffset": 0
23
+ }
24
+ ],
25
+ "md5sum": "a5721d9560034ebdaf9d54846920d333"
26
+ },
27
+ {
28
+ "dataPath": "params_shard_1.bin",
29
+ "format": "raw-shard",
30
+ "nbytes": 33357824,
31
+ "records": [
32
+ {
33
+ "name": "model.embed_tokens.q_scale",
34
+ "shape": [
35
+ 32000,
36
+ 128
37
+ ],
38
+ "dtype": "float16",
39
+ "format": "f32-to-bf16",
40
+ "nbytes": 8192000,
41
+ "byteOffset": 0
42
+ },
43
+ {
44
+ "name": "model.layers.0.self_attn.qkv_proj.q_weight",
45
+ "shape": [
46
+ 512,
47
+ 12288
48
+ ],
49
+ "dtype": "uint32",
50
+ "format": "f32-to-bf16",
51
+ "nbytes": 25165824,
52
+ "byteOffset": 8192000
53
+ }
54
+ ],
55
+ "md5sum": "5552f5612454729423354be2af931df1"
56
+ },
57
+ {
58
+ "dataPath": "params_shard_2.bin",
59
+ "format": "raw-shard",
60
+ "nbytes": 45088768,
61
+ "records": [
62
+ {
63
+ "name": "model.layers.0.mlp.gate_up_proj.q_weight",
64
+ "shape": [
65
+ 512,
66
+ 22016
67
+ ],
68
+ "dtype": "uint32",
69
+ "format": "f32-to-bf16",
70
+ "nbytes": 45088768,
71
+ "byteOffset": 0
72
+ }
73
+ ],
74
+ "md5sum": "ba27e916567a0c6f37f181aa8df94d1f"
75
+ },
76
+ {
77
+ "dataPath": "params_shard_3.bin",
78
+ "format": "raw-shard",
79
+ "nbytes": 22544384,
80
+ "records": [
81
+ {
82
+ "name": "model.layers.0.mlp.down_proj.q_weight",
83
+ "shape": [
84
+ 1376,
85
+ 4096
86
+ ],
87
+ "dtype": "uint32",
88
+ "format": "f32-to-bf16",
89
+ "nbytes": 22544384,
90
+ "byteOffset": 0
91
+ }
92
+ ],
93
+ "md5sum": "734f8aa85a33629a93177a47c054492e"
94
+ },
95
+ {
96
+ "dataPath": "params_shard_4.bin",
97
+ "format": "raw-shard",
98
+ "nbytes": 25165824,
99
+ "records": [
100
+ {
101
+ "name": "model.layers.1.self_attn.qkv_proj.q_weight",
102
+ "shape": [
103
+ 512,
104
+ 12288
105
+ ],
106
+ "dtype": "uint32",
107
+ "format": "f32-to-bf16",
108
+ "nbytes": 25165824,
109
+ "byteOffset": 0
110
+ }
111
+ ],
112
+ "md5sum": "13ff761572920e237be3b62d59cbd4e0"
113
+ },
114
+ {
115
+ "dataPath": "params_shard_5.bin",
116
+ "format": "raw-shard",
117
+ "nbytes": 32587776,
118
+ "records": [
119
+ {
120
+ "name": "model.layers.0.self_attn.qkv_proj.q_scale",
121
+ "shape": [
122
+ 128,
123
+ 12288
124
+ ],
125
+ "dtype": "float16",
126
+ "format": "f32-to-bf16",
127
+ "nbytes": 3145728,
128
+ "byteOffset": 0
129
+ },
130
+ {
131
+ "name": "model.layers.0.self_attn.o_proj.q_weight",
132
+ "shape": [
133
+ 512,
134
+ 4096
135
+ ],
136
+ "dtype": "uint32",
137
+ "format": "f32-to-bf16",
138
+ "nbytes": 8388608,
139
+ "byteOffset": 3145728
140
+ },
141
+ {
142
+ "name": "model.layers.0.self_attn.o_proj.q_scale",
143
+ "shape": [
144
+ 128,
145
+ 4096
146
+ ],
147
+ "dtype": "float16",
148
+ "format": "f32-to-bf16",
149
+ "nbytes": 1048576,
150
+ "byteOffset": 11534336
151
+ },
152
+ {
153
+ "name": "model.layers.0.mlp.gate_up_proj.q_scale",
154
+ "shape": [
155
+ 128,
156
+ 22016
157
+ ],
158
+ "dtype": "float16",
159
+ "format": "f32-to-bf16",
160
+ "nbytes": 5636096,
161
+ "byteOffset": 12582912
162
+ },
163
+ {
164
+ "name": "model.layers.0.mlp.down_proj.q_scale",
165
+ "shape": [
166
+ 344,
167
+ 4096
168
+ ],
169
+ "dtype": "float16",
170
+ "format": "f32-to-bf16",
171
+ "nbytes": 2818048,
172
+ "byteOffset": 18219008
173
+ },
174
+ {
175
+ "name": "model.layers.0.input_layernorm.weight",
176
+ "shape": [
177
+ 4096
178
+ ],
179
+ "dtype": "float16",
180
+ "format": "f32-to-bf16",
181
+ "nbytes": 8192,
182
+ "byteOffset": 21037056
183
+ },
184
+ {
185
+ "name": "model.layers.0.post_attention_layernorm.weight",
186
+ "shape": [
187
+ 4096
188
+ ],
189
+ "dtype": "float16",
190
+ "format": "f32-to-bf16",
191
+ "nbytes": 8192,
192
+ "byteOffset": 21045248
193
+ },
194
+ {
195
+ "name": "model.layers.1.self_attn.qkv_proj.q_scale",
196
+ "shape": [
197
+ 128,
198
+ 12288
199
+ ],
200
+ "dtype": "float16",
201
+ "format": "f32-to-bf16",
202
+ "nbytes": 3145728,
203
+ "byteOffset": 21053440
204
+ },
205
+ {
206
+ "name": "model.layers.1.self_attn.o_proj.q_weight",
207
+ "shape": [
208
+ 512,
209
+ 4096
210
+ ],
211
+ "dtype": "uint32",
212
+ "format": "f32-to-bf16",
213
+ "nbytes": 8388608,
214
+ "byteOffset": 24199168
215
+ }
216
+ ],
217
+ "md5sum": "1d1b5df3e123de5bc1fbb1993222d84b"
218
+ },
219
+ {
220
+ "dataPath": "params_shard_6.bin",
221
+ "format": "raw-shard",
222
+ "nbytes": 45088768,
223
+ "records": [
224
+ {
225
+ "name": "model.layers.1.mlp.gate_up_proj.q_weight",
226
+ "shape": [
227
+ 512,
228
+ 22016
229
+ ],
230
+ "dtype": "uint32",
231
+ "format": "f32-to-bf16",
232
+ "nbytes": 45088768,
233
+ "byteOffset": 0
234
+ }
235
+ ],
236
+ "md5sum": "95d00be34be16fcf3a2fe2302724e82f"
237
+ },
238
+ {
239
+ "dataPath": "params_shard_7.bin",
240
+ "format": "raw-shard",
241
+ "nbytes": 25165824,
242
+ "records": [
243
+ {
244
+ "name": "model.layers.2.self_attn.qkv_proj.q_weight",
245
+ "shape": [
246
+ 512,
247
+ 12288
248
+ ],
249
+ "dtype": "uint32",
250
+ "format": "f32-to-bf16",
251
+ "nbytes": 25165824,
252
+ "byteOffset": 0
253
+ }
254
+ ],
255
+ "md5sum": "a422e8e8913d0267583354c6daa769a3"
256
+ },
257
+ {
258
+ "dataPath": "params_shard_8.bin",
259
+ "format": "raw-shard",
260
+ "nbytes": 32063488,
261
+ "records": [
262
+ {
263
+ "name": "model.layers.1.self_attn.o_proj.q_scale",
264
+ "shape": [
265
+ 128,
266
+ 4096
267
+ ],
268
+ "dtype": "float16",
269
+ "format": "f32-to-bf16",
270
+ "nbytes": 1048576,
271
+ "byteOffset": 0
272
+ },
273
+ {
274
+ "name": "model.layers.1.mlp.gate_up_proj.q_scale",
275
+ "shape": [
276
+ 128,
277
+ 22016
278
+ ],
279
+ "dtype": "float16",
280
+ "format": "f32-to-bf16",
281
+ "nbytes": 5636096,
282
+ "byteOffset": 1048576
283
+ },
284
+ {
285
+ "name": "model.layers.1.mlp.down_proj.q_weight",
286
+ "shape": [
287
+ 1376,
288
+ 4096
289
+ ],
290
+ "dtype": "uint32",
291
+ "format": "f32-to-bf16",
292
+ "nbytes": 22544384,
293
+ "byteOffset": 6684672
294
+ },
295
+ {
296
+ "name": "model.layers.1.mlp.down_proj.q_scale",
297
+ "shape": [
298
+ 344,
299
+ 4096
300
+ ],
301
+ "dtype": "float16",
302
+ "format": "f32-to-bf16",
303
+ "nbytes": 2818048,
304
+ "byteOffset": 29229056
305
+ },
306
+ {
307
+ "name": "model.layers.1.input_layernorm.weight",
308
+ "shape": [
309
+ 4096
310
+ ],
311
+ "dtype": "float16",
312
+ "format": "f32-to-bf16",
313
+ "nbytes": 8192,
314
+ "byteOffset": 32047104
315
+ },
316
+ {
317
+ "name": "model.layers.1.post_attention_layernorm.weight",
318
+ "shape": [
319
+ 4096
320
+ ],
321
+ "dtype": "float16",
322
+ "format": "f32-to-bf16",
323
+ "nbytes": 8192,
324
+ "byteOffset": 32055296
325
+ }
326
+ ],
327
+ "md5sum": "b7017825ad7f81708e314fcf9d7cb0de"
328
+ },
329
+ {
330
+ "dataPath": "params_shard_9.bin",
331
+ "format": "raw-shard",
332
+ "nbytes": 45088768,
333
+ "records": [
334
+ {
335
+ "name": "model.layers.2.mlp.gate_up_proj.q_weight",
336
+ "shape": [
337
+ 512,
338
+ 22016
339
+ ],
340
+ "dtype": "uint32",
341
+ "format": "f32-to-bf16",
342
+ "nbytes": 45088768,
343
+ "byteOffset": 0
344
+ }
345
+ ],
346
+ "md5sum": "4b91662b6e1ee4e7b38296ec31b72bb1"
347
+ },
348
+ {
349
+ "dataPath": "params_shard_10.bin",
350
+ "format": "raw-shard",
351
+ "nbytes": 22544384,
352
+ "records": [
353
+ {
354
+ "name": "model.layers.2.mlp.down_proj.q_weight",
355
+ "shape": [
356
+ 1376,
357
+ 4096
358
+ ],
359
+ "dtype": "uint32",
360
+ "format": "f32-to-bf16",
361
+ "nbytes": 22544384,
362
+ "byteOffset": 0
363
+ }
364
+ ],
365
+ "md5sum": "852bbb7402bea2c0b6be3265d3d3c85b"
366
+ },
367
+ {
368
+ "dataPath": "params_shard_11.bin",
369
+ "format": "raw-shard",
370
+ "nbytes": 25165824,
371
+ "records": [
372
+ {
373
+ "name": "model.layers.3.self_attn.qkv_proj.q_weight",
374
+ "shape": [
375
+ 512,
376
+ 12288
377
+ ],
378
+ "dtype": "uint32",
379
+ "format": "f32-to-bf16",
380
+ "nbytes": 25165824,
381
+ "byteOffset": 0
382
+ }
383
+ ],
384
+ "md5sum": "7e293d65ec025332caa0bbf0809fd83c"
385
+ },
386
+ {
387
+ "dataPath": "params_shard_12.bin",
388
+ "format": "raw-shard",
389
+ "nbytes": 32587776,
390
+ "records": [
391
+ {
392
+ "name": "model.layers.2.self_attn.qkv_proj.q_scale",
393
+ "shape": [
394
+ 128,
395
+ 12288
396
+ ],
397
+ "dtype": "float16",
398
+ "format": "f32-to-bf16",
399
+ "nbytes": 3145728,
400
+ "byteOffset": 0
401
+ },
402
+ {
403
+ "name": "model.layers.2.self_attn.o_proj.q_weight",
404
+ "shape": [
405
+ 512,
406
+ 4096
407
+ ],
408
+ "dtype": "uint32",
409
+ "format": "f32-to-bf16",
410
+ "nbytes": 8388608,
411
+ "byteOffset": 3145728
412
+ },
413
+ {
414
+ "name": "model.layers.2.self_attn.o_proj.q_scale",
415
+ "shape": [
416
+ 128,
417
+ 4096
418
+ ],
419
+ "dtype": "float16",
420
+ "format": "f32-to-bf16",
421
+ "nbytes": 1048576,
422
+ "byteOffset": 11534336
423
+ },
424
+ {
425
+ "name": "model.layers.2.mlp.gate_up_proj.q_scale",
426
+ "shape": [
427
+ 128,
428
+ 22016
429
+ ],
430
+ "dtype": "float16",
431
+ "format": "f32-to-bf16",
432
+ "nbytes": 5636096,
433
+ "byteOffset": 12582912
434
+ },
435
+ {
436
+ "name": "model.layers.2.mlp.down_proj.q_scale",
437
+ "shape": [
438
+ 344,
439
+ 4096
440
+ ],
441
+ "dtype": "float16",
442
+ "format": "f32-to-bf16",
443
+ "nbytes": 2818048,
444
+ "byteOffset": 18219008
445
+ },
446
+ {
447
+ "name": "model.layers.2.input_layernorm.weight",
448
+ "shape": [
449
+ 4096
450
+ ],
451
+ "dtype": "float16",
452
+ "format": "f32-to-bf16",
453
+ "nbytes": 8192,
454
+ "byteOffset": 21037056
455
+ },
456
+ {
457
+ "name": "model.layers.2.post_attention_layernorm.weight",
458
+ "shape": [
459
+ 4096
460
+ ],
461
+ "dtype": "float16",
462
+ "format": "f32-to-bf16",
463
+ "nbytes": 8192,
464
+ "byteOffset": 21045248
465
+ },
466
+ {
467
+ "name": "model.layers.3.self_attn.qkv_proj.q_scale",
468
+ "shape": [
469
+ 128,
470
+ 12288
471
+ ],
472
+ "dtype": "float16",
473
+ "format": "f32-to-bf16",
474
+ "nbytes": 3145728,
475
+ "byteOffset": 21053440
476
+ },
477
+ {
478
+ "name": "model.layers.3.self_attn.o_proj.q_weight",
479
+ "shape": [
480
+ 512,
481
+ 4096
482
+ ],
483
+ "dtype": "uint32",
484
+ "format": "f32-to-bf16",
485
+ "nbytes": 8388608,
486
+ "byteOffset": 24199168
487
+ }
488
+ ],
489
+ "md5sum": "05b55ef5f5e542eae1880b70a3058648"
490
+ },
491
+ {
492
+ "dataPath": "params_shard_13.bin",
493
+ "format": "raw-shard",
494
+ "nbytes": 45088768,
495
+ "records": [
496
+ {
497
+ "name": "model.layers.3.mlp.gate_up_proj.q_weight",
498
+ "shape": [
499
+ 512,
500
+ 22016
501
+ ],
502
+ "dtype": "uint32",
503
+ "format": "f32-to-bf16",
504
+ "nbytes": 45088768,
505
+ "byteOffset": 0
506
+ }
507
+ ],
508
+ "md5sum": "7a434038b09c4c078fcf530723227ae7"
509
+ },
510
+ {
511
+ "dataPath": "params_shard_14.bin",
512
+ "format": "raw-shard",
513
+ "nbytes": 25165824,
514
+ "records": [
515
+ {
516
+ "name": "model.layers.4.self_attn.qkv_proj.q_weight",
517
+ "shape": [
518
+ 512,
519
+ 12288
520
+ ],
521
+ "dtype": "uint32",
522
+ "format": "f32-to-bf16",
523
+ "nbytes": 25165824,
524
+ "byteOffset": 0
525
+ }
526
+ ],
527
+ "md5sum": "d568ff3dafce1741101c8e53b9bf5782"
528
+ },
529
+ {
530
+ "dataPath": "params_shard_15.bin",
531
+ "format": "raw-shard",
532
+ "nbytes": 32063488,
533
+ "records": [
534
+ {
535
+ "name": "model.layers.3.self_attn.o_proj.q_scale",
536
+ "shape": [
537
+ 128,
538
+ 4096
539
+ ],
540
+ "dtype": "float16",
541
+ "format": "f32-to-bf16",
542
+ "nbytes": 1048576,
543
+ "byteOffset": 0
544
+ },
545
+ {
546
+ "name": "model.layers.3.mlp.gate_up_proj.q_scale",
547
+ "shape": [
548
+ 128,
549
+ 22016
550
+ ],
551
+ "dtype": "float16",
552
+ "format": "f32-to-bf16",
553
+ "nbytes": 5636096,
554
+ "byteOffset": 1048576
555
+ },
556
+ {
557
+ "name": "model.layers.3.mlp.down_proj.q_weight",
558
+ "shape": [
559
+ 1376,
560
+ 4096
561
+ ],
562
+ "dtype": "uint32",
563
+ "format": "f32-to-bf16",
564
+ "nbytes": 22544384,
565
+ "byteOffset": 6684672
566
+ },
567
+ {
568
+ "name": "model.layers.3.mlp.down_proj.q_scale",
569
+ "shape": [
570
+ 344,
571
+ 4096
572
+ ],
573
+ "dtype": "float16",
574
+ "format": "f32-to-bf16",
575
+ "nbytes": 2818048,
576
+ "byteOffset": 29229056
577
+ },
578
+ {
579
+ "name": "model.layers.3.input_layernorm.weight",
580
+ "shape": [
581
+ 4096
582
+ ],
583
+ "dtype": "float16",
584
+ "format": "f32-to-bf16",
585
+ "nbytes": 8192,
586
+ "byteOffset": 32047104
587
+ },
588
+ {
589
+ "name": "model.layers.3.post_attention_layernorm.weight",
590
+ "shape": [
591
+ 4096
592
+ ],
593
+ "dtype": "float16",
594
+ "format": "f32-to-bf16",
595
+ "nbytes": 8192,
596
+ "byteOffset": 32055296
597
+ }
598
+ ],
599
+ "md5sum": "b8783055d9133af4581ec9c708e2e387"
600
+ },
601
+ {
602
+ "dataPath": "params_shard_16.bin",
603
+ "format": "raw-shard",
604
+ "nbytes": 45088768,
605
+ "records": [
606
+ {
607
+ "name": "model.layers.4.mlp.gate_up_proj.q_weight",
608
+ "shape": [
609
+ 512,
610
+ 22016
611
+ ],
612
+ "dtype": "uint32",
613
+ "format": "f32-to-bf16",
614
+ "nbytes": 45088768,
615
+ "byteOffset": 0
616
+ }
617
+ ],
618
+ "md5sum": "f7e0999619a4b8fcf8ed2c4d3a43dc84"
619
+ },
620
+ {
621
+ "dataPath": "params_shard_17.bin",
622
+ "format": "raw-shard",
623
+ "nbytes": 22544384,
624
+ "records": [
625
+ {
626
+ "name": "model.layers.4.mlp.down_proj.q_weight",
627
+ "shape": [
628
+ 1376,
629
+ 4096
630
+ ],
631
+ "dtype": "uint32",
632
+ "format": "f32-to-bf16",
633
+ "nbytes": 22544384,
634
+ "byteOffset": 0
635
+ }
636
+ ],
637
+ "md5sum": "760ea2fa6f4a377b77a3ea3931c01a9d"
638
+ },
639
+ {
640
+ "dataPath": "params_shard_18.bin",
641
+ "format": "raw-shard",
642
+ "nbytes": 25165824,
643
+ "records": [
644
+ {
645
+ "name": "model.layers.5.self_attn.qkv_proj.q_weight",
646
+ "shape": [
647
+ 512,
648
+ 12288
649
+ ],
650
+ "dtype": "uint32",
651
+ "format": "f32-to-bf16",
652
+ "nbytes": 25165824,
653
+ "byteOffset": 0
654
+ }
655
+ ],
656
+ "md5sum": "70a3d08ba73b8b54807c494e0fec7c64"
657
+ },
658
+ {
659
+ "dataPath": "params_shard_19.bin",
660
+ "format": "raw-shard",
661
+ "nbytes": 32587776,
662
+ "records": [
663
+ {
664
+ "name": "model.layers.4.self_attn.qkv_proj.q_scale",
665
+ "shape": [
666
+ 128,
667
+ 12288
668
+ ],
669
+ "dtype": "float16",
670
+ "format": "f32-to-bf16",
671
+ "nbytes": 3145728,
672
+ "byteOffset": 0
673
+ },
674
+ {
675
+ "name": "model.layers.4.self_attn.o_proj.q_weight",
676
+ "shape": [
677
+ 512,
678
+ 4096
679
+ ],
680
+ "dtype": "uint32",
681
+ "format": "f32-to-bf16",
682
+ "nbytes": 8388608,
683
+ "byteOffset": 3145728
684
+ },
685
+ {
686
+ "name": "model.layers.4.self_attn.o_proj.q_scale",
687
+ "shape": [
688
+ 128,
689
+ 4096
690
+ ],
691
+ "dtype": "float16",
692
+ "format": "f32-to-bf16",
693
+ "nbytes": 1048576,
694
+ "byteOffset": 11534336
695
+ },
696
+ {
697
+ "name": "model.layers.4.mlp.gate_up_proj.q_scale",
698
+ "shape": [
699
+ 128,
700
+ 22016
701
+ ],
702
+ "dtype": "float16",
703
+ "format": "f32-to-bf16",
704
+ "nbytes": 5636096,
705
+ "byteOffset": 12582912
706
+ },
707
+ {
708
+ "name": "model.layers.4.mlp.down_proj.q_scale",
709
+ "shape": [
710
+ 344,
711
+ 4096
712
+ ],
713
+ "dtype": "float16",
714
+ "format": "f32-to-bf16",
715
+ "nbytes": 2818048,
716
+ "byteOffset": 18219008
717
+ },
718
+ {
719
+ "name": "model.layers.4.input_layernorm.weight",
720
+ "shape": [
721
+ 4096
722
+ ],
723
+ "dtype": "float16",
724
+ "format": "f32-to-bf16",
725
+ "nbytes": 8192,
726
+ "byteOffset": 21037056
727
+ },
728
+ {
729
+ "name": "model.layers.4.post_attention_layernorm.weight",
730
+ "shape": [
731
+ 4096
732
+ ],
733
+ "dtype": "float16",
734
+ "format": "f32-to-bf16",
735
+ "nbytes": 8192,
736
+ "byteOffset": 21045248
737
+ },
738
+ {
739
+ "name": "model.layers.5.self_attn.qkv_proj.q_scale",
740
+ "shape": [
741
+ 128,
742
+ 12288
743
+ ],
744
+ "dtype": "float16",
745
+ "format": "f32-to-bf16",
746
+ "nbytes": 3145728,
747
+ "byteOffset": 21053440
748
+ },
749
+ {
750
+ "name": "model.layers.5.self_attn.o_proj.q_weight",
751
+ "shape": [
752
+ 512,
753
+ 4096
754
+ ],
755
+ "dtype": "uint32",
756
+ "format": "f32-to-bf16",
757
+ "nbytes": 8388608,
758
+ "byteOffset": 24199168
759
+ }
760
+ ],
761
+ "md5sum": "32bfcbd2bab849060a7708b6a7c733b9"
762
+ },
763
+ {
764
+ "dataPath": "params_shard_20.bin",
765
+ "format": "raw-shard",
766
+ "nbytes": 45088768,
767
+ "records": [
768
+ {
769
+ "name": "model.layers.5.mlp.gate_up_proj.q_weight",
770
+ "shape": [
771
+ 512,
772
+ 22016
773
+ ],
774
+ "dtype": "uint32",
775
+ "format": "f32-to-bf16",
776
+ "nbytes": 45088768,
777
+ "byteOffset": 0
778
+ }
779
+ ],
780
+ "md5sum": "e8d3b2a91b3f9dcdbafe5429b4ef3532"
781
+ },
782
+ {
783
+ "dataPath": "params_shard_21.bin",
784
+ "format": "raw-shard",
785
+ "nbytes": 25165824,
786
+ "records": [
787
+ {
788
+ "name": "model.layers.6.self_attn.qkv_proj.q_weight",
789
+ "shape": [
790
+ 512,
791
+ 12288
792
+ ],
793
+ "dtype": "uint32",
794
+ "format": "f32-to-bf16",
795
+ "nbytes": 25165824,
796
+ "byteOffset": 0
797
+ }
798
+ ],
799
+ "md5sum": "1fa932714da6576f3d25c1d8f2383755"
800
+ },
801
+ {
802
+ "dataPath": "params_shard_22.bin",
803
+ "format": "raw-shard",
804
+ "nbytes": 32063488,
805
+ "records": [
806
+ {
807
+ "name": "model.layers.5.self_attn.o_proj.q_scale",
808
+ "shape": [
809
+ 128,
810
+ 4096
811
+ ],
812
+ "dtype": "float16",
813
+ "format": "f32-to-bf16",
814
+ "nbytes": 1048576,
815
+ "byteOffset": 0
816
+ },
817
+ {
818
+ "name": "model.layers.5.mlp.gate_up_proj.q_scale",
819
+ "shape": [
820
+ 128,
821
+ 22016
822
+ ],
823
+ "dtype": "float16",
824
+ "format": "f32-to-bf16",
825
+ "nbytes": 5636096,
826
+ "byteOffset": 1048576
827
+ },
828
+ {
829
+ "name": "model.layers.5.mlp.down_proj.q_weight",
830
+ "shape": [
831
+ 1376,
832
+ 4096
833
+ ],
834
+ "dtype": "uint32",
835
+ "format": "f32-to-bf16",
836
+ "nbytes": 22544384,
837
+ "byteOffset": 6684672
838
+ },
839
+ {
840
+ "name": "model.layers.5.mlp.down_proj.q_scale",
841
+ "shape": [
842
+ 344,
843
+ 4096
844
+ ],
845
+ "dtype": "float16",
846
+ "format": "f32-to-bf16",
847
+ "nbytes": 2818048,
848
+ "byteOffset": 29229056
849
+ },
850
+ {
851
+ "name": "model.layers.5.input_layernorm.weight",
852
+ "shape": [
853
+ 4096
854
+ ],
855
+ "dtype": "float16",
856
+ "format": "f32-to-bf16",
857
+ "nbytes": 8192,
858
+ "byteOffset": 32047104
859
+ },
860
+ {
861
+ "name": "model.layers.5.post_attention_layernorm.weight",
862
+ "shape": [
863
+ 4096
864
+ ],
865
+ "dtype": "float16",
866
+ "format": "f32-to-bf16",
867
+ "nbytes": 8192,
868
+ "byteOffset": 32055296
869
+ }
870
+ ],
871
+ "md5sum": "1192f4b46399ee1f4117a3873193a3cb"
872
+ },
873
+ {
874
+ "dataPath": "params_shard_23.bin",
875
+ "format": "raw-shard",
876
+ "nbytes": 45088768,
877
+ "records": [
878
+ {
879
+ "name": "model.layers.6.mlp.gate_up_proj.q_weight",
880
+ "shape": [
881
+ 512,
882
+ 22016
883
+ ],
884
+ "dtype": "uint32",
885
+ "format": "f32-to-bf16",
886
+ "nbytes": 45088768,
887
+ "byteOffset": 0
888
+ }
889
+ ],
890
+ "md5sum": "fff6ec2b589e78fbac6e8f71c361c792"
891
+ },
892
+ {
893
+ "dataPath": "params_shard_24.bin",
894
+ "format": "raw-shard",
895
+ "nbytes": 22544384,
896
+ "records": [
897
+ {
898
+ "name": "model.layers.6.mlp.down_proj.q_weight",
899
+ "shape": [
900
+ 1376,
901
+ 4096
902
+ ],
903
+ "dtype": "uint32",
904
+ "format": "f32-to-bf16",
905
+ "nbytes": 22544384,
906
+ "byteOffset": 0
907
+ }
908
+ ],
909
+ "md5sum": "6348bbf89535736f69b5fbaf16beaffb"
910
+ },
911
+ {
912
+ "dataPath": "params_shard_25.bin",
913
+ "format": "raw-shard",
914
+ "nbytes": 25165824,
915
+ "records": [
916
+ {
917
+ "name": "model.layers.7.self_attn.qkv_proj.q_weight",
918
+ "shape": [
919
+ 512,
920
+ 12288
921
+ ],
922
+ "dtype": "uint32",
923
+ "format": "f32-to-bf16",
924
+ "nbytes": 25165824,
925
+ "byteOffset": 0
926
+ }
927
+ ],
928
+ "md5sum": "97a5111ae4c3eb75a706632a87dbb4b6"
929
+ },
930
+ {
931
+ "dataPath": "params_shard_26.bin",
932
+ "format": "raw-shard",
933
+ "nbytes": 32587776,
934
+ "records": [
935
+ {
936
+ "name": "model.layers.6.self_attn.qkv_proj.q_scale",
937
+ "shape": [
938
+ 128,
939
+ 12288
940
+ ],
941
+ "dtype": "float16",
942
+ "format": "f32-to-bf16",
943
+ "nbytes": 3145728,
944
+ "byteOffset": 0
945
+ },
946
+ {
947
+ "name": "model.layers.6.self_attn.o_proj.q_weight",
948
+ "shape": [
949
+ 512,
950
+ 4096
951
+ ],
952
+ "dtype": "uint32",
953
+ "format": "f32-to-bf16",
954
+ "nbytes": 8388608,
955
+ "byteOffset": 3145728
956
+ },
957
+ {
958
+ "name": "model.layers.6.self_attn.o_proj.q_scale",
959
+ "shape": [
960
+ 128,
961
+ 4096
962
+ ],
963
+ "dtype": "float16",
964
+ "format": "f32-to-bf16",
965
+ "nbytes": 1048576,
966
+ "byteOffset": 11534336
967
+ },
968
+ {
969
+ "name": "model.layers.6.mlp.gate_up_proj.q_scale",
970
+ "shape": [
971
+ 128,
972
+ 22016
973
+ ],
974
+ "dtype": "float16",
975
+ "format": "f32-to-bf16",
976
+ "nbytes": 5636096,
977
+ "byteOffset": 12582912
978
+ },
979
+ {
980
+ "name": "model.layers.6.mlp.down_proj.q_scale",
981
+ "shape": [
982
+ 344,
983
+ 4096
984
+ ],
985
+ "dtype": "float16",
986
+ "format": "f32-to-bf16",
987
+ "nbytes": 2818048,
988
+ "byteOffset": 18219008
989
+ },
990
+ {
991
+ "name": "model.layers.6.input_layernorm.weight",
992
+ "shape": [
993
+ 4096
994
+ ],
995
+ "dtype": "float16",
996
+ "format": "f32-to-bf16",
997
+ "nbytes": 8192,
998
+ "byteOffset": 21037056
999
+ },
1000
+ {
1001
+ "name": "model.layers.6.post_attention_layernorm.weight",
1002
+ "shape": [
1003
+ 4096
1004
+ ],
1005
+ "dtype": "float16",
1006
+ "format": "f32-to-bf16",
1007
+ "nbytes": 8192,
1008
+ "byteOffset": 21045248
1009
+ },
1010
+ {
1011
+ "name": "model.layers.7.self_attn.qkv_proj.q_scale",
1012
+ "shape": [
1013
+ 128,
1014
+ 12288
1015
+ ],
1016
+ "dtype": "float16",
1017
+ "format": "f32-to-bf16",
1018
+ "nbytes": 3145728,
1019
+ "byteOffset": 21053440
1020
+ },
1021
+ {
1022
+ "name": "model.layers.7.self_attn.o_proj.q_weight",
1023
+ "shape": [
1024
+ 512,
1025
+ 4096
1026
+ ],
1027
+ "dtype": "uint32",
1028
+ "format": "f32-to-bf16",
1029
+ "nbytes": 8388608,
1030
+ "byteOffset": 24199168
1031
+ }
1032
+ ],
1033
+ "md5sum": "17b99865e224249e9c4a5724abef7548"
1034
+ },
1035
+ {
1036
+ "dataPath": "params_shard_27.bin",
1037
+ "format": "raw-shard",
1038
+ "nbytes": 45088768,
1039
+ "records": [
1040
+ {
1041
+ "name": "model.layers.7.mlp.gate_up_proj.q_weight",
1042
+ "shape": [
1043
+ 512,
1044
+ 22016
1045
+ ],
1046
+ "dtype": "uint32",
1047
+ "format": "f32-to-bf16",
1048
+ "nbytes": 45088768,
1049
+ "byteOffset": 0
1050
+ }
1051
+ ],
1052
+ "md5sum": "dee2f47ac2113817449ad4037f14cc63"
1053
+ },
1054
+ {
1055
+ "dataPath": "params_shard_28.bin",
1056
+ "format": "raw-shard",
1057
+ "nbytes": 25165824,
1058
+ "records": [
1059
+ {
1060
+ "name": "model.layers.8.self_attn.qkv_proj.q_weight",
1061
+ "shape": [
1062
+ 512,
1063
+ 12288
1064
+ ],
1065
+ "dtype": "uint32",
1066
+ "format": "f32-to-bf16",
1067
+ "nbytes": 25165824,
1068
+ "byteOffset": 0
1069
+ }
1070
+ ],
1071
+ "md5sum": "7a5feeebfe6442bcc4138fca9dc50b88"
1072
+ },
1073
+ {
1074
+ "dataPath": "params_shard_29.bin",
1075
+ "format": "raw-shard",
1076
+ "nbytes": 32063488,
1077
+ "records": [
1078
+ {
1079
+ "name": "model.layers.7.self_attn.o_proj.q_scale",
1080
+ "shape": [
1081
+ 128,
1082
+ 4096
1083
+ ],
1084
+ "dtype": "float16",
1085
+ "format": "f32-to-bf16",
1086
+ "nbytes": 1048576,
1087
+ "byteOffset": 0
1088
+ },
1089
+ {
1090
+ "name": "model.layers.7.mlp.gate_up_proj.q_scale",
1091
+ "shape": [
1092
+ 128,
1093
+ 22016
1094
+ ],
1095
+ "dtype": "float16",
1096
+ "format": "f32-to-bf16",
1097
+ "nbytes": 5636096,
1098
+ "byteOffset": 1048576
1099
+ },
1100
+ {
1101
+ "name": "model.layers.7.mlp.down_proj.q_weight",
1102
+ "shape": [
1103
+ 1376,
1104
+ 4096
1105
+ ],
1106
+ "dtype": "uint32",
1107
+ "format": "f32-to-bf16",
1108
+ "nbytes": 22544384,
1109
+ "byteOffset": 6684672
1110
+ },
1111
+ {
1112
+ "name": "model.layers.7.mlp.down_proj.q_scale",
1113
+ "shape": [
1114
+ 344,
1115
+ 4096
1116
+ ],
1117
+ "dtype": "float16",
1118
+ "format": "f32-to-bf16",
1119
+ "nbytes": 2818048,
1120
+ "byteOffset": 29229056
1121
+ },
1122
+ {
1123
+ "name": "model.layers.7.input_layernorm.weight",
1124
+ "shape": [
1125
+ 4096
1126
+ ],
1127
+ "dtype": "float16",
1128
+ "format": "f32-to-bf16",
1129
+ "nbytes": 8192,
1130
+ "byteOffset": 32047104
1131
+ },
1132
+ {
1133
+ "name": "model.layers.7.post_attention_layernorm.weight",
1134
+ "shape": [
1135
+ 4096
1136
+ ],
1137
+ "dtype": "float16",
1138
+ "format": "f32-to-bf16",
1139
+ "nbytes": 8192,
1140
+ "byteOffset": 32055296
1141
+ }
1142
+ ],
1143
+ "md5sum": "b00d0bdc57592edb480b7924e7c93bfa"
1144
+ },
1145
+ {
1146
+ "dataPath": "params_shard_30.bin",
1147
+ "format": "raw-shard",
1148
+ "nbytes": 45088768,
1149
+ "records": [
1150
+ {
1151
+ "name": "model.layers.8.mlp.gate_up_proj.q_weight",
1152
+ "shape": [
1153
+ 512,
1154
+ 22016
1155
+ ],
1156
+ "dtype": "uint32",
1157
+ "format": "f32-to-bf16",
1158
+ "nbytes": 45088768,
1159
+ "byteOffset": 0
1160
+ }
1161
+ ],
1162
+ "md5sum": "8d7931724b0b7b94de937af5b59bcc8f"
1163
+ },
1164
+ {
1165
+ "dataPath": "params_shard_31.bin",
1166
+ "format": "raw-shard",
1167
+ "nbytes": 22544384,
1168
+ "records": [
1169
+ {
1170
+ "name": "model.layers.8.mlp.down_proj.q_weight",
1171
+ "shape": [
1172
+ 1376,
1173
+ 4096
1174
+ ],
1175
+ "dtype": "uint32",
1176
+ "format": "f32-to-bf16",
1177
+ "nbytes": 22544384,
1178
+ "byteOffset": 0
1179
+ }
1180
+ ],
1181
+ "md5sum": "8cd54da1c291ad7d61883e5e4aba30f8"
1182
+ },
1183
+ {
1184
+ "dataPath": "params_shard_32.bin",
1185
+ "format": "raw-shard",
1186
+ "nbytes": 25165824,
1187
+ "records": [
1188
+ {
1189
+ "name": "model.layers.9.self_attn.qkv_proj.q_weight",
1190
+ "shape": [
1191
+ 512,
1192
+ 12288
1193
+ ],
1194
+ "dtype": "uint32",
1195
+ "format": "f32-to-bf16",
1196
+ "nbytes": 25165824,
1197
+ "byteOffset": 0
1198
+ }
1199
+ ],
1200
+ "md5sum": "c8caa63d672fe772e6b9a4c4146d8671"
1201
+ },
1202
+ {
1203
+ "dataPath": "params_shard_33.bin",
1204
+ "format": "raw-shard",
1205
+ "nbytes": 32587776,
1206
+ "records": [
1207
+ {
1208
+ "name": "model.layers.8.self_attn.qkv_proj.q_scale",
1209
+ "shape": [
1210
+ 128,
1211
+ 12288
1212
+ ],
1213
+ "dtype": "float16",
1214
+ "format": "f32-to-bf16",
1215
+ "nbytes": 3145728,
1216
+ "byteOffset": 0
1217
+ },
1218
+ {
1219
+ "name": "model.layers.8.self_attn.o_proj.q_weight",
1220
+ "shape": [
1221
+ 512,
1222
+ 4096
1223
+ ],
1224
+ "dtype": "uint32",
1225
+ "format": "f32-to-bf16",
1226
+ "nbytes": 8388608,
1227
+ "byteOffset": 3145728
1228
+ },
1229
+ {
1230
+ "name": "model.layers.8.self_attn.o_proj.q_scale",
1231
+ "shape": [
1232
+ 128,
1233
+ 4096
1234
+ ],
1235
+ "dtype": "float16",
1236
+ "format": "f32-to-bf16",
1237
+ "nbytes": 1048576,
1238
+ "byteOffset": 11534336
1239
+ },
1240
+ {
1241
+ "name": "model.layers.8.mlp.gate_up_proj.q_scale",
1242
+ "shape": [
1243
+ 128,
1244
+ 22016
1245
+ ],
1246
+ "dtype": "float16",
1247
+ "format": "f32-to-bf16",
1248
+ "nbytes": 5636096,
1249
+ "byteOffset": 12582912
1250
+ },
1251
+ {
1252
+ "name": "model.layers.8.mlp.down_proj.q_scale",
1253
+ "shape": [
1254
+ 344,
1255
+ 4096
1256
+ ],
1257
+ "dtype": "float16",
1258
+ "format": "f32-to-bf16",
1259
+ "nbytes": 2818048,
1260
+ "byteOffset": 18219008
1261
+ },
1262
+ {
1263
+ "name": "model.layers.8.input_layernorm.weight",
1264
+ "shape": [
1265
+ 4096
1266
+ ],
1267
+ "dtype": "float16",
1268
+ "format": "f32-to-bf16",
1269
+ "nbytes": 8192,
1270
+ "byteOffset": 21037056
1271
+ },
1272
+ {
1273
+ "name": "model.layers.8.post_attention_layernorm.weight",
1274
+ "shape": [
1275
+ 4096
1276
+ ],
1277
+ "dtype": "float16",
1278
+ "format": "f32-to-bf16",
1279
+ "nbytes": 8192,
1280
+ "byteOffset": 21045248
1281
+ },
1282
+ {
1283
+ "name": "model.layers.9.self_attn.qkv_proj.q_scale",
1284
+ "shape": [
1285
+ 128,
1286
+ 12288
1287
+ ],
1288
+ "dtype": "float16",
1289
+ "format": "f32-to-bf16",
1290
+ "nbytes": 3145728,
1291
+ "byteOffset": 21053440
1292
+ },
1293
+ {
1294
+ "name": "model.layers.9.self_attn.o_proj.q_weight",
1295
+ "shape": [
1296
+ 512,
1297
+ 4096
1298
+ ],
1299
+ "dtype": "uint32",
1300
+ "format": "f32-to-bf16",
1301
+ "nbytes": 8388608,
1302
+ "byteOffset": 24199168
1303
+ }
1304
+ ],
1305
+ "md5sum": "622ead990a574bd531048c9fe80f6b7c"
1306
+ },
1307
+ {
1308
+ "dataPath": "params_shard_34.bin",
1309
+ "format": "raw-shard",
1310
+ "nbytes": 45088768,
1311
+ "records": [
1312
+ {
1313
+ "name": "model.layers.9.mlp.gate_up_proj.q_weight",
1314
+ "shape": [
1315
+ 512,
1316
+ 22016
1317
+ ],
1318
+ "dtype": "uint32",
1319
+ "format": "f32-to-bf16",
1320
+ "nbytes": 45088768,
1321
+ "byteOffset": 0
1322
+ }
1323
+ ],
1324
+ "md5sum": "12055f1adbfb791eebbc0078fe8e5858"
1325
+ },
1326
+ {
1327
+ "dataPath": "params_shard_35.bin",
1328
+ "format": "raw-shard",
1329
+ "nbytes": 25165824,
1330
+ "records": [
1331
+ {
1332
+ "name": "model.layers.10.self_attn.qkv_proj.q_weight",
1333
+ "shape": [
1334
+ 512,
1335
+ 12288
1336
+ ],
1337
+ "dtype": "uint32",
1338
+ "format": "f32-to-bf16",
1339
+ "nbytes": 25165824,
1340
+ "byteOffset": 0
1341
+ }
1342
+ ],
1343
+ "md5sum": "ae6ac334b031863772956ded167529c6"
1344
+ },
1345
+ {
1346
+ "dataPath": "params_shard_36.bin",
1347
+ "format": "raw-shard",
1348
+ "nbytes": 32063488,
1349
+ "records": [
1350
+ {
1351
+ "name": "model.layers.9.self_attn.o_proj.q_scale",
1352
+ "shape": [
1353
+ 128,
1354
+ 4096
1355
+ ],
1356
+ "dtype": "float16",
1357
+ "format": "f32-to-bf16",
1358
+ "nbytes": 1048576,
1359
+ "byteOffset": 0
1360
+ },
1361
+ {
1362
+ "name": "model.layers.9.mlp.gate_up_proj.q_scale",
1363
+ "shape": [
1364
+ 128,
1365
+ 22016
1366
+ ],
1367
+ "dtype": "float16",
1368
+ "format": "f32-to-bf16",
1369
+ "nbytes": 5636096,
1370
+ "byteOffset": 1048576
1371
+ },
1372
+ {
1373
+ "name": "model.layers.9.mlp.down_proj.q_weight",
1374
+ "shape": [
1375
+ 1376,
1376
+ 4096
1377
+ ],
1378
+ "dtype": "uint32",
1379
+ "format": "f32-to-bf16",
1380
+ "nbytes": 22544384,
1381
+ "byteOffset": 6684672
1382
+ },
1383
+ {
1384
+ "name": "model.layers.9.mlp.down_proj.q_scale",
1385
+ "shape": [
1386
+ 344,
1387
+ 4096
1388
+ ],
1389
+ "dtype": "float16",
1390
+ "format": "f32-to-bf16",
1391
+ "nbytes": 2818048,
1392
+ "byteOffset": 29229056
1393
+ },
1394
+ {
1395
+ "name": "model.layers.9.input_layernorm.weight",
1396
+ "shape": [
1397
+ 4096
1398
+ ],
1399
+ "dtype": "float16",
1400
+ "format": "f32-to-bf16",
1401
+ "nbytes": 8192,
1402
+ "byteOffset": 32047104
1403
+ },
1404
+ {
1405
+ "name": "model.layers.9.post_attention_layernorm.weight",
1406
+ "shape": [
1407
+ 4096
1408
+ ],
1409
+ "dtype": "float16",
1410
+ "format": "f32-to-bf16",
1411
+ "nbytes": 8192,
1412
+ "byteOffset": 32055296
1413
+ }
1414
+ ],
1415
+ "md5sum": "ae02657a6091f73cee639fad0a51c79c"
1416
+ },
1417
+ {
1418
+ "dataPath": "params_shard_37.bin",
1419
+ "format": "raw-shard",
1420
+ "nbytes": 45088768,
1421
+ "records": [
1422
+ {
1423
+ "name": "model.layers.10.mlp.gate_up_proj.q_weight",
1424
+ "shape": [
1425
+ 512,
1426
+ 22016
1427
+ ],
1428
+ "dtype": "uint32",
1429
+ "format": "f32-to-bf16",
1430
+ "nbytes": 45088768,
1431
+ "byteOffset": 0
1432
+ }
1433
+ ],
1434
+ "md5sum": "2cfacefc2fa03ee433b1f6cefc838b4c"
1435
+ },
1436
+ {
1437
+ "dataPath": "params_shard_38.bin",
1438
+ "format": "raw-shard",
1439
+ "nbytes": 22544384,
1440
+ "records": [
1441
+ {
1442
+ "name": "model.layers.10.mlp.down_proj.q_weight",
1443
+ "shape": [
1444
+ 1376,
1445
+ 4096
1446
+ ],
1447
+ "dtype": "uint32",
1448
+ "format": "f32-to-bf16",
1449
+ "nbytes": 22544384,
1450
+ "byteOffset": 0
1451
+ }
1452
+ ],
1453
+ "md5sum": "ef213324fb1862847cc91132225feb63"
1454
+ },
1455
+ {
1456
+ "dataPath": "params_shard_39.bin",
1457
+ "format": "raw-shard",
1458
+ "nbytes": 25165824,
1459
+ "records": [
1460
+ {
1461
+ "name": "model.layers.11.self_attn.qkv_proj.q_weight",
1462
+ "shape": [
1463
+ 512,
1464
+ 12288
1465
+ ],
1466
+ "dtype": "uint32",
1467
+ "format": "f32-to-bf16",
1468
+ "nbytes": 25165824,
1469
+ "byteOffset": 0
1470
+ }
1471
+ ],
1472
+ "md5sum": "f9c0976e25f3666e26528100ac3b1692"
1473
+ },
1474
+ {
1475
+ "dataPath": "params_shard_40.bin",
1476
+ "format": "raw-shard",
1477
+ "nbytes": 32587776,
1478
+ "records": [
1479
+ {
1480
+ "name": "model.layers.10.self_attn.qkv_proj.q_scale",
1481
+ "shape": [
1482
+ 128,
1483
+ 12288
1484
+ ],
1485
+ "dtype": "float16",
1486
+ "format": "f32-to-bf16",
1487
+ "nbytes": 3145728,
1488
+ "byteOffset": 0
1489
+ },
1490
+ {
1491
+ "name": "model.layers.10.self_attn.o_proj.q_weight",
1492
+ "shape": [
1493
+ 512,
1494
+ 4096
1495
+ ],
1496
+ "dtype": "uint32",
1497
+ "format": "f32-to-bf16",
1498
+ "nbytes": 8388608,
1499
+ "byteOffset": 3145728
1500
+ },
1501
+ {
1502
+ "name": "model.layers.10.self_attn.o_proj.q_scale",
1503
+ "shape": [
1504
+ 128,
1505
+ 4096
1506
+ ],
1507
+ "dtype": "float16",
1508
+ "format": "f32-to-bf16",
1509
+ "nbytes": 1048576,
1510
+ "byteOffset": 11534336
1511
+ },
1512
+ {
1513
+ "name": "model.layers.10.mlp.gate_up_proj.q_scale",
1514
+ "shape": [
1515
+ 128,
1516
+ 22016
1517
+ ],
1518
+ "dtype": "float16",
1519
+ "format": "f32-to-bf16",
1520
+ "nbytes": 5636096,
1521
+ "byteOffset": 12582912
1522
+ },
1523
+ {
1524
+ "name": "model.layers.10.mlp.down_proj.q_scale",
1525
+ "shape": [
1526
+ 344,
1527
+ 4096
1528
+ ],
1529
+ "dtype": "float16",
1530
+ "format": "f32-to-bf16",
1531
+ "nbytes": 2818048,
1532
+ "byteOffset": 18219008
1533
+ },
1534
+ {
1535
+ "name": "model.layers.10.input_layernorm.weight",
1536
+ "shape": [
1537
+ 4096
1538
+ ],
1539
+ "dtype": "float16",
1540
+ "format": "f32-to-bf16",
1541
+ "nbytes": 8192,
1542
+ "byteOffset": 21037056
1543
+ },
1544
+ {
1545
+ "name": "model.layers.10.post_attention_layernorm.weight",
1546
+ "shape": [
1547
+ 4096
1548
+ ],
1549
+ "dtype": "float16",
1550
+ "format": "f32-to-bf16",
1551
+ "nbytes": 8192,
1552
+ "byteOffset": 21045248
1553
+ },
1554
+ {
1555
+ "name": "model.layers.11.self_attn.qkv_proj.q_scale",
1556
+ "shape": [
1557
+ 128,
1558
+ 12288
1559
+ ],
1560
+ "dtype": "float16",
1561
+ "format": "f32-to-bf16",
1562
+ "nbytes": 3145728,
1563
+ "byteOffset": 21053440
1564
+ },
1565
+ {
1566
+ "name": "model.layers.11.self_attn.o_proj.q_weight",
1567
+ "shape": [
1568
+ 512,
1569
+ 4096
1570
+ ],
1571
+ "dtype": "uint32",
1572
+ "format": "f32-to-bf16",
1573
+ "nbytes": 8388608,
1574
+ "byteOffset": 24199168
1575
+ }
1576
+ ],
1577
+ "md5sum": "05237e2904fb62b138e1e6ea5c3496cd"
1578
+ },
1579
+ {
1580
+ "dataPath": "params_shard_41.bin",
1581
+ "format": "raw-shard",
1582
+ "nbytes": 45088768,
1583
+ "records": [
1584
+ {
1585
+ "name": "model.layers.11.mlp.gate_up_proj.q_weight",
1586
+ "shape": [
1587
+ 512,
1588
+ 22016
1589
+ ],
1590
+ "dtype": "uint32",
1591
+ "format": "f32-to-bf16",
1592
+ "nbytes": 45088768,
1593
+ "byteOffset": 0
1594
+ }
1595
+ ],
1596
+ "md5sum": "d309b28471e4124e03531f6806631ce5"
1597
+ },
1598
+ {
1599
+ "dataPath": "params_shard_42.bin",
1600
+ "format": "raw-shard",
1601
+ "nbytes": 65536000,
1602
+ "records": [
1603
+ {
1604
+ "name": "lm_head.q_weight",
1605
+ "shape": [
1606
+ 512,
1607
+ 32000
1608
+ ],
1609
+ "dtype": "uint32",
1610
+ "format": "f32-to-bf16",
1611
+ "nbytes": 65536000,
1612
+ "byteOffset": 0
1613
+ }
1614
+ ],
1615
+ "md5sum": "ede184766099ed6415755f6629949e57"
1616
+ },
1617
+ {
1618
+ "dataPath": "params_shard_43.bin",
1619
+ "format": "raw-shard",
1620
+ "nbytes": 32071680,
1621
+ "records": [
1622
+ {
1623
+ "name": "model.layers.11.self_attn.o_proj.q_scale",
1624
+ "shape": [
1625
+ 128,
1626
+ 4096
1627
+ ],
1628
+ "dtype": "float16",
1629
+ "format": "f32-to-bf16",
1630
+ "nbytes": 1048576,
1631
+ "byteOffset": 0
1632
+ },
1633
+ {
1634
+ "name": "model.layers.11.mlp.gate_up_proj.q_scale",
1635
+ "shape": [
1636
+ 128,
1637
+ 22016
1638
+ ],
1639
+ "dtype": "float16",
1640
+ "format": "f32-to-bf16",
1641
+ "nbytes": 5636096,
1642
+ "byteOffset": 1048576
1643
+ },
1644
+ {
1645
+ "name": "model.layers.11.mlp.down_proj.q_weight",
1646
+ "shape": [
1647
+ 1376,
1648
+ 4096
1649
+ ],
1650
+ "dtype": "uint32",
1651
+ "format": "f32-to-bf16",
1652
+ "nbytes": 22544384,
1653
+ "byteOffset": 6684672
1654
+ },
1655
+ {
1656
+ "name": "model.layers.11.mlp.down_proj.q_scale",
1657
+ "shape": [
1658
+ 344,
1659
+ 4096
1660
+ ],
1661
+ "dtype": "float16",
1662
+ "format": "f32-to-bf16",
1663
+ "nbytes": 2818048,
1664
+ "byteOffset": 29229056
1665
+ },
1666
+ {
1667
+ "name": "model.layers.11.input_layernorm.weight",
1668
+ "shape": [
1669
+ 4096
1670
+ ],
1671
+ "dtype": "float16",
1672
+ "format": "f32-to-bf16",
1673
+ "nbytes": 8192,
1674
+ "byteOffset": 32047104
1675
+ },
1676
+ {
1677
+ "name": "model.layers.11.post_attention_layernorm.weight",
1678
+ "shape": [
1679
+ 4096
1680
+ ],
1681
+ "dtype": "float16",
1682
+ "format": "f32-to-bf16",
1683
+ "nbytes": 8192,
1684
+ "byteOffset": 32055296
1685
+ },
1686
+ {
1687
+ "name": "model.norm.weight",
1688
+ "shape": [
1689
+ 4096
1690
+ ],
1691
+ "dtype": "float16",
1692
+ "format": "f32-to-bf16",
1693
+ "nbytes": 8192,
1694
+ "byteOffset": 32063488
1695
+ }
1696
+ ],
1697
+ "md5sum": "89d5e83b4da202f7bec532285ca1e80e"
1698
+ },
1699
+ {
1700
+ "dataPath": "params_shard_44.bin",
1701
+ "format": "raw-shard",
1702
+ "nbytes": 8192000,
1703
+ "records": [
1704
+ {
1705
+ "name": "lm_head.q_scale",
1706
+ "shape": [
1707
+ 128,
1708
+ 32000
1709
+ ],
1710
+ "dtype": "float16",
1711
+ "format": "f32-to-bf16",
1712
+ "nbytes": 8192000,
1713
+ "byteOffset": 0
1714
+ }
1715
+ ],
1716
+ "md5sum": "3503f06e614550cba195b922d158f64c"
1717
+ }
1718
+ ]
1719
+ }
params_shard_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba2015d1236070535adc75517a2e64b5d0ce66ecc0c66579c6b3ab68ad83dd22
3
+ size 65536000
params_shard_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db1a53432b403c9ad2a61464d0b7d4dc2a631135958977a88c363bb62717328f
3
+ size 33357824
params_shard_10.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e5781d8ee1085ec94121be7281d92852ef6ea5e270ffaf294065ed89ceb1665
3
+ size 22544384
params_shard_11.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f60b771d842644a17bb17fc400edad6369d3ad2a8c669cdf2a8a399323b70f10
3
+ size 25165824
params_shard_12.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7960c50b39046b762c08b45782bd759b9f0ec503ce889d742954c6ec27aca2aa
3
+ size 32587776
params_shard_13.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c55a09ad912a3e2f7069b1e0be5b4d0b761b78cfdd82ccd0705b8944db567d0
3
+ size 45088768
params_shard_14.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdc57a47f4f60ef72e6f98f18ba4a01e4cbe9ea87608a34bd837a3a39cc3f692
3
+ size 25165824
params_shard_15.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c73ab632b94e24daca4ab121f3777914c8d5443d4634aa37b7ee9b9ad8f714e
3
+ size 32063488
params_shard_16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44e04dd1b99ed61520d865cdaf0924e8d3560aa3eb14d1efd65cdb6e58d3ab5b
3
+ size 45088768
params_shard_17.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f39d310200922ccc58be027dd1a006411153bf0c0f7cc9c3582710a386df4563
3
+ size 22544384
params_shard_18.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95eed2416640f44ee90dd326bdda5df73e5c5080fed0065ded31594e96d55b5f
3
+ size 25165824
params_shard_19.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cb7a2405b2a58d3fab3599909fc66cc6363ca0a186b77cdf300de367b0c71ca
3
+ size 32587776
params_shard_2.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:676892bbd934eac5843405f04884b4a9e073f59c94f64791fc5d9b3b93dc4a7c
3
+ size 45088768
params_shard_20.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6484cd2e220ca9847b5e363529e282fd7e844c99d3b5794fabf15f26b1f0e14f
3
+ size 45088768
params_shard_21.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1ac3681330ffea53a2c3b1b572112cda2e803ff22f156220328e35bc33d0006
3
+ size 25165824
params_shard_22.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb9d361651a60c3aaf581de6546f3ed200e8dfafd8669c3c96a0bd8565c2a2b7
3
+ size 32063488
params_shard_23.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e616ddccfc29f943d0867165f9965c15a36fef5dcad44ecb53d5d089a6b100c
3
+ size 45088768
params_shard_24.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab80a0dc0a084a222ed1d43446a4495ed04b681e7ec19c34c642034847d2c9f5
3
+ size 22544384
params_shard_25.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:144768aa3b03fff287569b98fb04ca1cd27508a6e0036bf3ab8d5804720ebf08
3
+ size 25165824
params_shard_26.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:854db9df2f34751a09517f3f449be32020b580514c47da86fc8aeffda2968025
3
+ size 32587776
params_shard_27.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a803ecd2adebcfd2bcb54fdbe016207dbe5a021ac5e827dbe99e0d395c21be9c
3
+ size 45088768
params_shard_28.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49fd3e7eb92923c60b68265982448d69d7126b0646d0d1b59874330d7410d88c
3
+ size 25165824
params_shard_29.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2463bfaeaa61e13e1e82eeb5f36df2a9d7a92284e9952b9dd2d586d9b918f1d4
3
+ size 32063488
params_shard_3.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bfcac35dfcdee24fea58d0be7d99fc49766cf1231e70fa0a2ab7ad57c216cc43
3
+ size 22544384
params_shard_30.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dd60d37278293d574f0fd24143127f745a3ccafc71255d9188b7a4aee965c56
3
+ size 45088768
params_shard_31.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d54f747cd297332a2a72f8a27318300219a2ac4f7f7a581f19a73ccc63e53eee
3
+ size 22544384
params_shard_32.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:075b097be957f9fd8ad5bfe31e7e95853c93a5f7373f348ed33ab9c8cd585337
3
+ size 25165824
params_shard_33.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5808a1d117afe04c01be033c9314411a9717e1d1c9bcbeeff90c57d1d7ca9b2e
3
+ size 32587776
params_shard_34.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d59fc48707b005058d1f0f20348a230463e593d7800e984839a6fc412b996df
3
+ size 45088768
params_shard_35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed8f7118d918cea235017c45d070e0b70f5f9bfbb8036953b900b6fa35d38bd3
3
+ size 25165824
params_shard_36.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46fb826b4294e2559503b3fc6150bfa8e97a61ce1584de3c5c86211cbde42325
3
+ size 32063488
params_shard_37.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86aefb8b40d22c668d010e934a39d3479eec4c5a5e22db0a68289e0e343b6e65
3
+ size 45088768
params_shard_38.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecb8318ae7e6477d17b032c4d3369e0663ae6655467280a052354ce80bd14a67
3
+ size 22544384
params_shard_39.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8b6ba37e825ae90a17df2a531f64a57cbffd5ac0ae7da30e10c80c02ec45a6c
3
+ size 25165824
params_shard_4.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4a53a73c4c289a19caa9de0b1bc6ae9a3cbef1c444977b9297c24ba3e4503fc
3
+ size 25165824
params_shard_40.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98e8d5f6891ce79e551474c7f53a4b87569905cbe8b0e1bacb25be8445a3cc21
3
+ size 32587776
params_shard_41.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14cd523b325aa32c39d501f1ecf4fb35773caa68a0cf1eada7cd541008c51c55
3
+ size 45088768
params_shard_42.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b20e0014cc768c7d8233ffdf762f155831651cd02776d1f50879d2faa7d99bc3
3
+ size 65536000
params_shard_43.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d90d84007821774bcfdea7996f15748d352e8125a58dfbb800df335cb338271a
3
+ size 32071680
params_shard_44.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:499986ba5aaaa49b3bef7d068d2ea815b7308bfb5f284bf602315036fc7bf733
3
+ size 8192000
params_shard_5.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7be4d3feab3c623564eda3293e7a90627ab182de5bd25f2a94bde9eb3a6d1ef7
3
+ size 32587776
params_shard_6.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:676889e1fe38ec0aa1778ccc92b4b440d9c9d3595bbb9cb530f73078454764a1
3
+ size 45088768
params_shard_7.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14510f3914beef6f097af95f3147d05db1347637b9c4c79a15f0dafa55a39220
3
+ size 25165824
params_shard_8.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7684d2c16ebe51d3536e2b19a5502b9502b7205ac9dfca1a993b31f189e8dcc2
3
+ size 32063488
params_shard_9.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8492319ed9aef8613d91623623d96a265ec0ce83b724ca6e6a971c7e83bbd231
3
+ size 45088768
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<s>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "clean_up_tokenization_spaces": false,
13
+ "eos_token": {
14
+ "__type": "AddedToken",
15
+ "content": "</s>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "legacy": true,
22
+ "model_max_length": 2048,
23
+ "pad_token": null,
24
+ "padding_side": "right",
25
+ "sp_model_kwargs": {},
26
+ "tokenizer_class": "LlamaTokenizer",
27
+ "unk_token": {
28
+ "__type": "AddedToken",
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false
34
+ }
35
+ }