illian01 commited on
Commit
d89e317
·
1 Parent(s): d69b026

Add weights and config

Browse files
mlc-chat-config.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "0.1.0",
3
+ "model_type": "llama",
4
+ "quantization": "q0f16",
5
+ "model_config": {
6
+ "hidden_size": 4096,
7
+ "intermediate_size": 11008,
8
+ "num_attention_heads": 32,
9
+ "num_hidden_layers": 6,
10
+ "rms_norm_eps": 1e-06,
11
+ "vocab_size": 32000,
12
+ "position_embedding_base": 10000,
13
+ "context_window_size": 2048,
14
+ "prefill_chunk_size": 2048,
15
+ "num_key_value_heads": 32,
16
+ "head_dim": 128,
17
+ "tensor_parallel_shards": 1,
18
+ "max_batch_size": 80
19
+ },
20
+ "vocab_size": 32000,
21
+ "context_window_size": 2048,
22
+ "sliding_window_size": -1,
23
+ "prefill_chunk_size": 2048,
24
+ "attention_sink_size": -1,
25
+ "tensor_parallel_shards": 1,
26
+ "mean_gen_len": 128,
27
+ "max_gen_len": 512,
28
+ "shift_fill_factor": 0.3,
29
+ "temperature": 0.6,
30
+ "presence_penalty": 0.0,
31
+ "frequency_penalty": 0.0,
32
+ "repetition_penalty": 1.0,
33
+ "top_p": 0.9,
34
+ "tokenizer_files": [
35
+ "tokenizer.model",
36
+ "tokenizer_config.json",
37
+ "tokenizer.json"
38
+ ],
39
+ "tokenizer_info": {
40
+ "token_postproc_method": "byte_fallback",
41
+ "prepend_space_in_encode": true,
42
+ "strip_space_in_decode": true
43
+ },
44
+ "conv_template": {
45
+ "name": "st-llm",
46
+ "system_template": "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{MessagePlaceholders.SYSTEM.value}</s>",
47
+ "system_message": "You are a helpful, respectful and honest assistant.",
48
+ "system_prefix_token_ids": null,
49
+ "add_role_after_system_message": true,
50
+ "roles": {
51
+ "user": "### Input:",
52
+ "assistant": "### Response:"
53
+ },
54
+ "role_templates": {
55
+ "user": "{user_message}",
56
+ "assistant": "{assistant_message}",
57
+ "tool": "{tool_message}"
58
+ },
59
+ "messages": [],
60
+ "seps": [
61
+ "</s>"
62
+ ],
63
+ "role_content_sep": "\n",
64
+ "role_empty_sep": "\n",
65
+ "stop_str": [
66
+ "</s>"
67
+ ],
68
+ "stop_token_ids": [
69
+ 2
70
+ ],
71
+ "function_string": "",
72
+ "use_function_calling": false
73
+ },
74
+ "pad_token_id": 0,
75
+ "bos_token_id": 1,
76
+ "eos_token_id": 2
77
+ }
ndarray-cache.json ADDED
@@ -0,0 +1,641 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 39,
4
+ "ParamBytes": 2952896512.0,
5
+ "BitsPerParam": 16.0
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 262144000,
12
+ "records": [
13
+ {
14
+ "name": "model.embed_tokens.weight",
15
+ "shape": [
16
+ 32000,
17
+ 4096
18
+ ],
19
+ "dtype": "float16",
20
+ "format": "f32-to-bf16",
21
+ "nbytes": 262144000,
22
+ "byteOffset": 0
23
+ }
24
+ ],
25
+ "md5sum": "03a65d61de7a52c70907ebf3fa9758aa"
26
+ },
27
+ {
28
+ "dataPath": "params_shard_1.bin",
29
+ "format": "raw-shard",
30
+ "nbytes": 100663296,
31
+ "records": [
32
+ {
33
+ "name": "model.layers.0.self_attn.qkv_proj.weight",
34
+ "shape": [
35
+ 12288,
36
+ 4096
37
+ ],
38
+ "dtype": "float16",
39
+ "format": "f32-to-bf16",
40
+ "nbytes": 100663296,
41
+ "byteOffset": 0
42
+ }
43
+ ],
44
+ "md5sum": "d12e6a357b596ce92179d4ea4b19ab72"
45
+ },
46
+ {
47
+ "dataPath": "params_shard_2.bin",
48
+ "format": "raw-shard",
49
+ "nbytes": 33554432,
50
+ "records": [
51
+ {
52
+ "name": "model.layers.0.self_attn.o_proj.weight",
53
+ "shape": [
54
+ 4096,
55
+ 4096
56
+ ],
57
+ "dtype": "float16",
58
+ "format": "f32-to-bf16",
59
+ "nbytes": 33554432,
60
+ "byteOffset": 0
61
+ }
62
+ ],
63
+ "md5sum": "5fa24d0ee40a80c8a62a7bf90dfaa8f5"
64
+ },
65
+ {
66
+ "dataPath": "params_shard_3.bin",
67
+ "format": "raw-shard",
68
+ "nbytes": 180355072,
69
+ "records": [
70
+ {
71
+ "name": "model.layers.0.mlp.gate_up_proj.weight",
72
+ "shape": [
73
+ 22016,
74
+ 4096
75
+ ],
76
+ "dtype": "float16",
77
+ "format": "f32-to-bf16",
78
+ "nbytes": 180355072,
79
+ "byteOffset": 0
80
+ }
81
+ ],
82
+ "md5sum": "f88725746b4a861b10f490577197c64e"
83
+ },
84
+ {
85
+ "dataPath": "params_shard_4.bin",
86
+ "format": "raw-shard",
87
+ "nbytes": 90177536,
88
+ "records": [
89
+ {
90
+ "name": "model.layers.0.mlp.down_proj.weight",
91
+ "shape": [
92
+ 4096,
93
+ 11008
94
+ ],
95
+ "dtype": "float16",
96
+ "format": "f32-to-bf16",
97
+ "nbytes": 90177536,
98
+ "byteOffset": 0
99
+ }
100
+ ],
101
+ "md5sum": "052c48f1fb0bdd535ecf92fe344164c0"
102
+ },
103
+ {
104
+ "dataPath": "params_shard_5.bin",
105
+ "format": "raw-shard",
106
+ "nbytes": 100663296,
107
+ "records": [
108
+ {
109
+ "name": "model.layers.1.self_attn.qkv_proj.weight",
110
+ "shape": [
111
+ 12288,
112
+ 4096
113
+ ],
114
+ "dtype": "float16",
115
+ "format": "f32-to-bf16",
116
+ "nbytes": 100663296,
117
+ "byteOffset": 0
118
+ }
119
+ ],
120
+ "md5sum": "86963edeb8ff7b9f21c95725a2a0d92c"
121
+ },
122
+ {
123
+ "dataPath": "params_shard_6.bin",
124
+ "format": "raw-shard",
125
+ "nbytes": 33554432,
126
+ "records": [
127
+ {
128
+ "name": "model.layers.1.self_attn.o_proj.weight",
129
+ "shape": [
130
+ 4096,
131
+ 4096
132
+ ],
133
+ "dtype": "float16",
134
+ "format": "f32-to-bf16",
135
+ "nbytes": 33554432,
136
+ "byteOffset": 0
137
+ }
138
+ ],
139
+ "md5sum": "942a2dc4de7d4f5f2b8146b9c411b25f"
140
+ },
141
+ {
142
+ "dataPath": "params_shard_7.bin",
143
+ "format": "raw-shard",
144
+ "nbytes": 180355072,
145
+ "records": [
146
+ {
147
+ "name": "model.layers.1.mlp.gate_up_proj.weight",
148
+ "shape": [
149
+ 22016,
150
+ 4096
151
+ ],
152
+ "dtype": "float16",
153
+ "format": "f32-to-bf16",
154
+ "nbytes": 180355072,
155
+ "byteOffset": 0
156
+ }
157
+ ],
158
+ "md5sum": "3aec97e2be4c4551e98c249eee9a90b9"
159
+ },
160
+ {
161
+ "dataPath": "params_shard_8.bin",
162
+ "format": "raw-shard",
163
+ "nbytes": 90177536,
164
+ "records": [
165
+ {
166
+ "name": "model.layers.1.mlp.down_proj.weight",
167
+ "shape": [
168
+ 4096,
169
+ 11008
170
+ ],
171
+ "dtype": "float16",
172
+ "format": "f32-to-bf16",
173
+ "nbytes": 90177536,
174
+ "byteOffset": 0
175
+ }
176
+ ],
177
+ "md5sum": "0680dc1b0beff0cd8e3fb2b7189b65c3"
178
+ },
179
+ {
180
+ "dataPath": "params_shard_9.bin",
181
+ "format": "raw-shard",
182
+ "nbytes": 100663296,
183
+ "records": [
184
+ {
185
+ "name": "model.layers.2.self_attn.qkv_proj.weight",
186
+ "shape": [
187
+ 12288,
188
+ 4096
189
+ ],
190
+ "dtype": "float16",
191
+ "format": "f32-to-bf16",
192
+ "nbytes": 100663296,
193
+ "byteOffset": 0
194
+ }
195
+ ],
196
+ "md5sum": "51e3c754050f45feb09a64ff92d67992"
197
+ },
198
+ {
199
+ "dataPath": "params_shard_10.bin",
200
+ "format": "raw-shard",
201
+ "nbytes": 33554432,
202
+ "records": [
203
+ {
204
+ "name": "model.layers.2.self_attn.o_proj.weight",
205
+ "shape": [
206
+ 4096,
207
+ 4096
208
+ ],
209
+ "dtype": "float16",
210
+ "format": "f32-to-bf16",
211
+ "nbytes": 33554432,
212
+ "byteOffset": 0
213
+ }
214
+ ],
215
+ "md5sum": "d68c5af73f2968c99e967f69ab1cc997"
216
+ },
217
+ {
218
+ "dataPath": "params_shard_11.bin",
219
+ "format": "raw-shard",
220
+ "nbytes": 180355072,
221
+ "records": [
222
+ {
223
+ "name": "model.layers.2.mlp.gate_up_proj.weight",
224
+ "shape": [
225
+ 22016,
226
+ 4096
227
+ ],
228
+ "dtype": "float16",
229
+ "format": "f32-to-bf16",
230
+ "nbytes": 180355072,
231
+ "byteOffset": 0
232
+ }
233
+ ],
234
+ "md5sum": "981fa5ab1c0cf50b47e8d26947039d4b"
235
+ },
236
+ {
237
+ "dataPath": "params_shard_12.bin",
238
+ "format": "raw-shard",
239
+ "nbytes": 90177536,
240
+ "records": [
241
+ {
242
+ "name": "model.layers.2.mlp.down_proj.weight",
243
+ "shape": [
244
+ 4096,
245
+ 11008
246
+ ],
247
+ "dtype": "float16",
248
+ "format": "f32-to-bf16",
249
+ "nbytes": 90177536,
250
+ "byteOffset": 0
251
+ }
252
+ ],
253
+ "md5sum": "779e5926260fa5d3f2beedab6b9d3c38"
254
+ },
255
+ {
256
+ "dataPath": "params_shard_13.bin",
257
+ "format": "raw-shard",
258
+ "nbytes": 100663296,
259
+ "records": [
260
+ {
261
+ "name": "model.layers.3.self_attn.qkv_proj.weight",
262
+ "shape": [
263
+ 12288,
264
+ 4096
265
+ ],
266
+ "dtype": "float16",
267
+ "format": "f32-to-bf16",
268
+ "nbytes": 100663296,
269
+ "byteOffset": 0
270
+ }
271
+ ],
272
+ "md5sum": "105ee9f3e172f4052971b7cdef5a8bcc"
273
+ },
274
+ {
275
+ "dataPath": "params_shard_14.bin",
276
+ "format": "raw-shard",
277
+ "nbytes": 33554432,
278
+ "records": [
279
+ {
280
+ "name": "model.layers.3.self_attn.o_proj.weight",
281
+ "shape": [
282
+ 4096,
283
+ 4096
284
+ ],
285
+ "dtype": "float16",
286
+ "format": "f32-to-bf16",
287
+ "nbytes": 33554432,
288
+ "byteOffset": 0
289
+ }
290
+ ],
291
+ "md5sum": "556c5286ca315c99172e4232aedd4515"
292
+ },
293
+ {
294
+ "dataPath": "params_shard_15.bin",
295
+ "format": "raw-shard",
296
+ "nbytes": 180355072,
297
+ "records": [
298
+ {
299
+ "name": "model.layers.3.mlp.gate_up_proj.weight",
300
+ "shape": [
301
+ 22016,
302
+ 4096
303
+ ],
304
+ "dtype": "float16",
305
+ "format": "f32-to-bf16",
306
+ "nbytes": 180355072,
307
+ "byteOffset": 0
308
+ }
309
+ ],
310
+ "md5sum": "dbbf47ddc8ebcfc1f81a8346c0da5283"
311
+ },
312
+ {
313
+ "dataPath": "params_shard_16.bin",
314
+ "format": "raw-shard",
315
+ "nbytes": 90177536,
316
+ "records": [
317
+ {
318
+ "name": "model.layers.3.mlp.down_proj.weight",
319
+ "shape": [
320
+ 4096,
321
+ 11008
322
+ ],
323
+ "dtype": "float16",
324
+ "format": "f32-to-bf16",
325
+ "nbytes": 90177536,
326
+ "byteOffset": 0
327
+ }
328
+ ],
329
+ "md5sum": "dcc685b20ab52daaa91da66f35b9ce23"
330
+ },
331
+ {
332
+ "dataPath": "params_shard_17.bin",
333
+ "format": "raw-shard",
334
+ "nbytes": 100663296,
335
+ "records": [
336
+ {
337
+ "name": "model.layers.4.self_attn.qkv_proj.weight",
338
+ "shape": [
339
+ 12288,
340
+ 4096
341
+ ],
342
+ "dtype": "float16",
343
+ "format": "f32-to-bf16",
344
+ "nbytes": 100663296,
345
+ "byteOffset": 0
346
+ }
347
+ ],
348
+ "md5sum": "dbfa8c7d780a8d2632e08cc797a752aa"
349
+ },
350
+ {
351
+ "dataPath": "params_shard_18.bin",
352
+ "format": "raw-shard",
353
+ "nbytes": 33554432,
354
+ "records": [
355
+ {
356
+ "name": "model.layers.4.self_attn.o_proj.weight",
357
+ "shape": [
358
+ 4096,
359
+ 4096
360
+ ],
361
+ "dtype": "float16",
362
+ "format": "f32-to-bf16",
363
+ "nbytes": 33554432,
364
+ "byteOffset": 0
365
+ }
366
+ ],
367
+ "md5sum": "2484304417644cdb2e2d109654a1f736"
368
+ },
369
+ {
370
+ "dataPath": "params_shard_19.bin",
371
+ "format": "raw-shard",
372
+ "nbytes": 180355072,
373
+ "records": [
374
+ {
375
+ "name": "model.layers.4.mlp.gate_up_proj.weight",
376
+ "shape": [
377
+ 22016,
378
+ 4096
379
+ ],
380
+ "dtype": "float16",
381
+ "format": "f32-to-bf16",
382
+ "nbytes": 180355072,
383
+ "byteOffset": 0
384
+ }
385
+ ],
386
+ "md5sum": "015be42b6f9599103a6851f5422cebbd"
387
+ },
388
+ {
389
+ "dataPath": "params_shard_20.bin",
390
+ "format": "raw-shard",
391
+ "nbytes": 90177536,
392
+ "records": [
393
+ {
394
+ "name": "model.layers.4.mlp.down_proj.weight",
395
+ "shape": [
396
+ 4096,
397
+ 11008
398
+ ],
399
+ "dtype": "float16",
400
+ "format": "f32-to-bf16",
401
+ "nbytes": 90177536,
402
+ "byteOffset": 0
403
+ }
404
+ ],
405
+ "md5sum": "13392f17a75ff7d6aa15755f3485efc7"
406
+ },
407
+ {
408
+ "dataPath": "params_shard_21.bin",
409
+ "format": "raw-shard",
410
+ "nbytes": 100663296,
411
+ "records": [
412
+ {
413
+ "name": "model.layers.5.self_attn.qkv_proj.weight",
414
+ "shape": [
415
+ 12288,
416
+ 4096
417
+ ],
418
+ "dtype": "float16",
419
+ "format": "f32-to-bf16",
420
+ "nbytes": 100663296,
421
+ "byteOffset": 0
422
+ }
423
+ ],
424
+ "md5sum": "88d89ea3b05485eb29a84866b45bc4c4"
425
+ },
426
+ {
427
+ "dataPath": "params_shard_22.bin",
428
+ "format": "raw-shard",
429
+ "nbytes": 33554432,
430
+ "records": [
431
+ {
432
+ "name": "model.layers.5.self_attn.o_proj.weight",
433
+ "shape": [
434
+ 4096,
435
+ 4096
436
+ ],
437
+ "dtype": "float16",
438
+ "format": "f32-to-bf16",
439
+ "nbytes": 33554432,
440
+ "byteOffset": 0
441
+ }
442
+ ],
443
+ "md5sum": "7a4fc355a3c058bd876b3e001ea67bec"
444
+ },
445
+ {
446
+ "dataPath": "params_shard_23.bin",
447
+ "format": "raw-shard",
448
+ "nbytes": 180355072,
449
+ "records": [
450
+ {
451
+ "name": "model.layers.5.mlp.gate_up_proj.weight",
452
+ "shape": [
453
+ 22016,
454
+ 4096
455
+ ],
456
+ "dtype": "float16",
457
+ "format": "f32-to-bf16",
458
+ "nbytes": 180355072,
459
+ "byteOffset": 0
460
+ }
461
+ ],
462
+ "md5sum": "b4850fd6ab6c472e174fa3285188138a"
463
+ },
464
+ {
465
+ "dataPath": "params_shard_24.bin",
466
+ "format": "raw-shard",
467
+ "nbytes": 90177536,
468
+ "records": [
469
+ {
470
+ "name": "model.layers.5.mlp.down_proj.weight",
471
+ "shape": [
472
+ 4096,
473
+ 11008
474
+ ],
475
+ "dtype": "float16",
476
+ "format": "f32-to-bf16",
477
+ "nbytes": 90177536,
478
+ "byteOffset": 0
479
+ }
480
+ ],
481
+ "md5sum": "c941087506333ff04625f661e89e9cd0"
482
+ },
483
+ {
484
+ "dataPath": "params_shard_25.bin",
485
+ "format": "raw-shard",
486
+ "nbytes": 262144000,
487
+ "records": [
488
+ {
489
+ "name": "lm_head.weight",
490
+ "shape": [
491
+ 32000,
492
+ 4096
493
+ ],
494
+ "dtype": "float16",
495
+ "format": "f32-to-bf16",
496
+ "nbytes": 262144000,
497
+ "byteOffset": 0
498
+ }
499
+ ],
500
+ "md5sum": "a314468d893019244bd0da57381b7e91"
501
+ },
502
+ {
503
+ "dataPath": "params_shard_26.bin",
504
+ "format": "raw-shard",
505
+ "nbytes": 106496,
506
+ "records": [
507
+ {
508
+ "name": "model.layers.0.input_layernorm.weight",
509
+ "shape": [
510
+ 4096
511
+ ],
512
+ "dtype": "float16",
513
+ "format": "f32-to-bf16",
514
+ "nbytes": 8192,
515
+ "byteOffset": 0
516
+ },
517
+ {
518
+ "name": "model.layers.0.post_attention_layernorm.weight",
519
+ "shape": [
520
+ 4096
521
+ ],
522
+ "dtype": "float16",
523
+ "format": "f32-to-bf16",
524
+ "nbytes": 8192,
525
+ "byteOffset": 8192
526
+ },
527
+ {
528
+ "name": "model.layers.1.input_layernorm.weight",
529
+ "shape": [
530
+ 4096
531
+ ],
532
+ "dtype": "float16",
533
+ "format": "f32-to-bf16",
534
+ "nbytes": 8192,
535
+ "byteOffset": 16384
536
+ },
537
+ {
538
+ "name": "model.layers.1.post_attention_layernorm.weight",
539
+ "shape": [
540
+ 4096
541
+ ],
542
+ "dtype": "float16",
543
+ "format": "f32-to-bf16",
544
+ "nbytes": 8192,
545
+ "byteOffset": 24576
546
+ },
547
+ {
548
+ "name": "model.layers.2.input_layernorm.weight",
549
+ "shape": [
550
+ 4096
551
+ ],
552
+ "dtype": "float16",
553
+ "format": "f32-to-bf16",
554
+ "nbytes": 8192,
555
+ "byteOffset": 32768
556
+ },
557
+ {
558
+ "name": "model.layers.2.post_attention_layernorm.weight",
559
+ "shape": [
560
+ 4096
561
+ ],
562
+ "dtype": "float16",
563
+ "format": "f32-to-bf16",
564
+ "nbytes": 8192,
565
+ "byteOffset": 40960
566
+ },
567
+ {
568
+ "name": "model.layers.3.input_layernorm.weight",
569
+ "shape": [
570
+ 4096
571
+ ],
572
+ "dtype": "float16",
573
+ "format": "f32-to-bf16",
574
+ "nbytes": 8192,
575
+ "byteOffset": 49152
576
+ },
577
+ {
578
+ "name": "model.layers.3.post_attention_layernorm.weight",
579
+ "shape": [
580
+ 4096
581
+ ],
582
+ "dtype": "float16",
583
+ "format": "f32-to-bf16",
584
+ "nbytes": 8192,
585
+ "byteOffset": 57344
586
+ },
587
+ {
588
+ "name": "model.layers.4.input_layernorm.weight",
589
+ "shape": [
590
+ 4096
591
+ ],
592
+ "dtype": "float16",
593
+ "format": "f32-to-bf16",
594
+ "nbytes": 8192,
595
+ "byteOffset": 65536
596
+ },
597
+ {
598
+ "name": "model.layers.4.post_attention_layernorm.weight",
599
+ "shape": [
600
+ 4096
601
+ ],
602
+ "dtype": "float16",
603
+ "format": "f32-to-bf16",
604
+ "nbytes": 8192,
605
+ "byteOffset": 73728
606
+ },
607
+ {
608
+ "name": "model.layers.5.input_layernorm.weight",
609
+ "shape": [
610
+ 4096
611
+ ],
612
+ "dtype": "float16",
613
+ "format": "f32-to-bf16",
614
+ "nbytes": 8192,
615
+ "byteOffset": 81920
616
+ },
617
+ {
618
+ "name": "model.layers.5.post_attention_layernorm.weight",
619
+ "shape": [
620
+ 4096
621
+ ],
622
+ "dtype": "float16",
623
+ "format": "f32-to-bf16",
624
+ "nbytes": 8192,
625
+ "byteOffset": 90112
626
+ },
627
+ {
628
+ "name": "model.norm.weight",
629
+ "shape": [
630
+ 4096
631
+ ],
632
+ "dtype": "float16",
633
+ "format": "f32-to-bf16",
634
+ "nbytes": 8192,
635
+ "byteOffset": 98304
636
+ }
637
+ ],
638
+ "md5sum": "029997b0662e92accb22f44c8a832172"
639
+ }
640
+ ]
641
+ }
params_shard_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58a6eb479b8d44d579d03f8d5ba8f2780993e2533b8a7f99df9e1a93a724e7e5
3
+ size 262144000
params_shard_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d89a33c2a1eb42303cd6a038c67a4b45829e54b83d43a488619852c3d935d445
3
+ size 100663296
params_shard_10.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:421af89980c29e5f65ed4261f2873933239b3b375724337b88cb1092d4f6334d
3
+ size 33554432
params_shard_11.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb8d52f2561e7fba0a23847a28a1642ae81b9365112dfa9f966cc065aace1b50
3
+ size 180355072
params_shard_12.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69c3fd4a25370551bfb101e2b1635a36e97ee8bbf90a0817661a683b2e789f80
3
+ size 90177536
params_shard_13.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b34c32a536518facedc4e51f783b3c7639b7c59f4c38af24dee097a7e09a3e1b
3
+ size 100663296
params_shard_14.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5af4b95b9aee2b52a3e75d5940b44eecff23a91601e96d398f20f14fe94a4500
3
+ size 33554432
params_shard_15.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f59b40055d226d8ca6e7e596524fefc1a4c913225f83c18f6fe2714cd3f3c915
3
+ size 180355072
params_shard_16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c794924707c847967e2b31590378a3334973e97a67e24cdcb57c3b1d126ce2af
3
+ size 90177536
params_shard_17.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24886ddc72397bd7d042aef4d06852262cb13f3b195cfb55a578e0dd983497aa
3
+ size 100663296
params_shard_18.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dba0fe8823fac205a526457e7b2fb84a2500ab7887dfc131af4faa823e0c387
3
+ size 33554432
params_shard_19.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e782ca1244691e1611f2aabe6854c563758b2581328ac1882528c95b8037d2c
3
+ size 180355072
params_shard_2.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:72f86365623fed05a01efe1c537d2c94fe67fcbb030aee9436ffa4201ad2449d
3
+ size 33554432
params_shard_20.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6877c0e4e003b7b02e647ca8c52aab372e08bccf59b9432fdc3e3386d4af5fc4
3
+ size 90177536
params_shard_21.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e5047683c4f65e3b84a6f040b24149120f4c23185ce261c89f62ffb8ce8e35d
3
+ size 100663296
params_shard_22.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:362cc73170031b7c2a5f021e081c402c2fc81eec788ef4da645da54c812aa37d
3
+ size 33554432
params_shard_23.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c21c64ebb1a22e771c2b1216f5c7a89573568e8368291570f941607bffed10f
3
+ size 180355072
params_shard_24.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ace74212d7f0c6130c14c2a38fedf245944ea010d32c9ca6b7424b765fc36e06
3
+ size 90177536
params_shard_25.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c37d5b5330d75b7a46b46f9bd207d8287968abd80bbb525a4ec962a8991e844
3
+ size 262144000
params_shard_26.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28e738ce8651dbbcd8eee9b4d7228a4457818b3638a81bee83ec21f9d58d0378
3
+ size 106496
params_shard_3.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efb47856082bbd8d34e2ff4f1e464517279b86f04b2164d2365f133c2b3e6e9a
3
+ size 180355072
params_shard_4.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0e4c2fdaaf5177ed0cf31f5944a193b7af5201c20860e2ae82a8db848174e88
3
+ size 90177536
params_shard_5.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:558977cd52f78dfb2fce9f72d3f8a41086bb765332330e6ff98cd73958202f0f
3
+ size 100663296
params_shard_6.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a9456313492137079f20f660b1c87a719febb75c4cc7c79029f61aab0a45fe5
3
+ size 33554432
params_shard_7.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2bd802da2e1508f82ad0765deb071f7cf96773ba8ef8f61a7df28f1d082a99a
3
+ size 180355072
params_shard_8.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cecd5c0c6bc2a7cb1e9d156f01279062c0c3f377679982051836178981256d22
3
+ size 90177536
params_shard_9.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e1011ada75bcde402cbbd41c0551ec5097c8b9529caeadd2ebb53c404328076
3
+ size 100663296
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<s>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "clean_up_tokenization_spaces": false,
13
+ "eos_token": {
14
+ "__type": "AddedToken",
15
+ "content": "</s>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "legacy": true,
22
+ "model_max_length": 2048,
23
+ "pad_token": null,
24
+ "padding_side": "right",
25
+ "sp_model_kwargs": {},
26
+ "tokenizer_class": "LlamaTokenizer",
27
+ "unk_token": {
28
+ "__type": "AddedToken",
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false
34
+ }
35
+ }