N8Programs commited on
Commit
301e5a9
·
verified ·
1 Parent(s): 3103fc2

Upload 5 files

Browse files
mlc-chat-config.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "0.1.0",
3
+ "model_type": "llama",
4
+ "quantization": "q4f16_1",
5
+ "model_config": {
6
+ "hidden_size": 384,
7
+ "intermediate_size": 768,
8
+ "num_attention_heads": 8,
9
+ "num_hidden_layers": 6,
10
+ "rms_norm_eps": 1e-06,
11
+ "vocab_size": 4096,
12
+ "tie_word_embeddings": true,
13
+ "position_embedding_base": 10000,
14
+ "rope_scaling": null,
15
+ "context_window_size": 2048,
16
+ "prefill_chunk_size": 2048,
17
+ "num_key_value_heads": 4,
18
+ "head_dim": 128,
19
+ "tensor_parallel_shards": 1,
20
+ "pipeline_parallel_stages": 1,
21
+ "max_batch_size": 128
22
+ },
23
+ "vocab_size": 4096,
24
+ "context_window_size": 2048,
25
+ "sliding_window_size": -1,
26
+ "prefill_chunk_size": 2048,
27
+ "attention_sink_size": -1,
28
+ "tensor_parallel_shards": 1,
29
+ "pipeline_parallel_stages": 1,
30
+ "temperature": 1.0,
31
+ "presence_penalty": 0.0,
32
+ "frequency_penalty": 0.0,
33
+ "repetition_penalty": 1.0,
34
+ "top_p": 1.0,
35
+ "tokenizer_files": [
36
+ "tokenizer.json",
37
+ "tokenizer_config.json"
38
+ ],
39
+ "tokenizer_info": {
40
+ "token_postproc_method": "byte_level",
41
+ "prepend_space_in_encode": false,
42
+ "strip_space_in_decode": false
43
+ },
44
+ "conv_template": {
45
+ "name": "LM",
46
+ "system_template": "{system_message}",
47
+ "system_message": "",
48
+ "system_prefix_token_ids": [
49
+ 1
50
+ ],
51
+ "add_role_after_system_message": true,
52
+ "roles": {
53
+ "user": "",
54
+ "assistant": ""
55
+ },
56
+ "role_templates": {
57
+ "user": "{user_message}",
58
+ "assistant": "{assistant_message}",
59
+ "tool": "{tool_message}"
60
+ },
61
+ "messages": [],
62
+ "seps": [
63
+ ""
64
+ ],
65
+ "role_content_sep": "",
66
+ "role_empty_sep": "",
67
+ "stop_str": [],
68
+ "stop_token_ids": [
69
+ 2
70
+ ],
71
+ "function_string": "",
72
+ "use_function_calling": false
73
+ },
74
+ "pad_token_id": 0,
75
+ "bos_token_id": 128000,
76
+ "eos_token_id": 128001
77
+ }
ndarray-cache.json ADDED
@@ -0,0 +1,697 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 63,
4
+ "ParamBytes": 7862016.0,
5
+ "BitsPerParam": 4.504111095833906
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 7862016,
12
+ "records": [
13
+ {
14
+ "name": "model.embed_tokens.q_weight",
15
+ "shape": [
16
+ 4096,
17
+ 48
18
+ ],
19
+ "dtype": "uint32",
20
+ "format": "f32-to-bf16",
21
+ "nbytes": 786432,
22
+ "byteOffset": 0
23
+ },
24
+ {
25
+ "name": "model.embed_tokens.q_scale",
26
+ "shape": [
27
+ 4096,
28
+ 12
29
+ ],
30
+ "dtype": "float16",
31
+ "format": "f32-to-bf16",
32
+ "nbytes": 98304,
33
+ "byteOffset": 786432
34
+ },
35
+ {
36
+ "name": "model.layers.0.input_layernorm.weight",
37
+ "shape": [
38
+ 384
39
+ ],
40
+ "dtype": "float16",
41
+ "format": "f32-to-bf16",
42
+ "nbytes": 768,
43
+ "byteOffset": 884736
44
+ },
45
+ {
46
+ "name": "model.layers.0.mlp.down_proj.q_weight",
47
+ "shape": [
48
+ 384,
49
+ 96
50
+ ],
51
+ "dtype": "uint32",
52
+ "format": "f32-to-bf16",
53
+ "nbytes": 147456,
54
+ "byteOffset": 885504
55
+ },
56
+ {
57
+ "name": "model.layers.0.mlp.down_proj.q_scale",
58
+ "shape": [
59
+ 384,
60
+ 24
61
+ ],
62
+ "dtype": "float16",
63
+ "format": "f32-to-bf16",
64
+ "nbytes": 18432,
65
+ "byteOffset": 1032960
66
+ },
67
+ {
68
+ "name": "model.layers.0.mlp.gate_up_proj.q_weight",
69
+ "shape": [
70
+ 1536,
71
+ 48
72
+ ],
73
+ "dtype": "uint32",
74
+ "format": "f32-to-bf16",
75
+ "nbytes": 294912,
76
+ "byteOffset": 1051392
77
+ },
78
+ {
79
+ "name": "model.layers.0.mlp.gate_up_proj.q_scale",
80
+ "shape": [
81
+ 1536,
82
+ 12
83
+ ],
84
+ "dtype": "float16",
85
+ "format": "f32-to-bf16",
86
+ "nbytes": 36864,
87
+ "byteOffset": 1346304
88
+ },
89
+ {
90
+ "name": "model.layers.0.post_attention_layernorm.weight",
91
+ "shape": [
92
+ 384
93
+ ],
94
+ "dtype": "float16",
95
+ "format": "f32-to-bf16",
96
+ "nbytes": 768,
97
+ "byteOffset": 1383168
98
+ },
99
+ {
100
+ "name": "model.layers.0.self_attn.qkv_proj.q_weight",
101
+ "shape": [
102
+ 2048,
103
+ 48
104
+ ],
105
+ "dtype": "uint32",
106
+ "format": "f32-to-bf16",
107
+ "nbytes": 393216,
108
+ "byteOffset": 1383936
109
+ },
110
+ {
111
+ "name": "model.layers.0.self_attn.qkv_proj.q_scale",
112
+ "shape": [
113
+ 2048,
114
+ 12
115
+ ],
116
+ "dtype": "float16",
117
+ "format": "f32-to-bf16",
118
+ "nbytes": 49152,
119
+ "byteOffset": 1777152
120
+ },
121
+ {
122
+ "name": "model.layers.0.self_attn.o_proj.q_weight",
123
+ "shape": [
124
+ 384,
125
+ 128
126
+ ],
127
+ "dtype": "uint32",
128
+ "format": "f32-to-bf16",
129
+ "nbytes": 196608,
130
+ "byteOffset": 1826304
131
+ },
132
+ {
133
+ "name": "model.layers.0.self_attn.o_proj.q_scale",
134
+ "shape": [
135
+ 384,
136
+ 32
137
+ ],
138
+ "dtype": "float16",
139
+ "format": "f32-to-bf16",
140
+ "nbytes": 24576,
141
+ "byteOffset": 2022912
142
+ },
143
+ {
144
+ "name": "model.layers.1.input_layernorm.weight",
145
+ "shape": [
146
+ 384
147
+ ],
148
+ "dtype": "float16",
149
+ "format": "f32-to-bf16",
150
+ "nbytes": 768,
151
+ "byteOffset": 2047488
152
+ },
153
+ {
154
+ "name": "model.layers.1.mlp.down_proj.q_weight",
155
+ "shape": [
156
+ 384,
157
+ 96
158
+ ],
159
+ "dtype": "uint32",
160
+ "format": "f32-to-bf16",
161
+ "nbytes": 147456,
162
+ "byteOffset": 2048256
163
+ },
164
+ {
165
+ "name": "model.layers.1.mlp.down_proj.q_scale",
166
+ "shape": [
167
+ 384,
168
+ 24
169
+ ],
170
+ "dtype": "float16",
171
+ "format": "f32-to-bf16",
172
+ "nbytes": 18432,
173
+ "byteOffset": 2195712
174
+ },
175
+ {
176
+ "name": "model.layers.1.mlp.gate_up_proj.q_weight",
177
+ "shape": [
178
+ 1536,
179
+ 48
180
+ ],
181
+ "dtype": "uint32",
182
+ "format": "f32-to-bf16",
183
+ "nbytes": 294912,
184
+ "byteOffset": 2214144
185
+ },
186
+ {
187
+ "name": "model.layers.1.mlp.gate_up_proj.q_scale",
188
+ "shape": [
189
+ 1536,
190
+ 12
191
+ ],
192
+ "dtype": "float16",
193
+ "format": "f32-to-bf16",
194
+ "nbytes": 36864,
195
+ "byteOffset": 2509056
196
+ },
197
+ {
198
+ "name": "model.layers.1.post_attention_layernorm.weight",
199
+ "shape": [
200
+ 384
201
+ ],
202
+ "dtype": "float16",
203
+ "format": "f32-to-bf16",
204
+ "nbytes": 768,
205
+ "byteOffset": 2545920
206
+ },
207
+ {
208
+ "name": "model.layers.1.self_attn.qkv_proj.q_weight",
209
+ "shape": [
210
+ 2048,
211
+ 48
212
+ ],
213
+ "dtype": "uint32",
214
+ "format": "f32-to-bf16",
215
+ "nbytes": 393216,
216
+ "byteOffset": 2546688
217
+ },
218
+ {
219
+ "name": "model.layers.1.self_attn.qkv_proj.q_scale",
220
+ "shape": [
221
+ 2048,
222
+ 12
223
+ ],
224
+ "dtype": "float16",
225
+ "format": "f32-to-bf16",
226
+ "nbytes": 49152,
227
+ "byteOffset": 2939904
228
+ },
229
+ {
230
+ "name": "model.layers.1.self_attn.o_proj.q_weight",
231
+ "shape": [
232
+ 384,
233
+ 128
234
+ ],
235
+ "dtype": "uint32",
236
+ "format": "f32-to-bf16",
237
+ "nbytes": 196608,
238
+ "byteOffset": 2989056
239
+ },
240
+ {
241
+ "name": "model.layers.1.self_attn.o_proj.q_scale",
242
+ "shape": [
243
+ 384,
244
+ 32
245
+ ],
246
+ "dtype": "float16",
247
+ "format": "f32-to-bf16",
248
+ "nbytes": 24576,
249
+ "byteOffset": 3185664
250
+ },
251
+ {
252
+ "name": "model.layers.2.input_layernorm.weight",
253
+ "shape": [
254
+ 384
255
+ ],
256
+ "dtype": "float16",
257
+ "format": "f32-to-bf16",
258
+ "nbytes": 768,
259
+ "byteOffset": 3210240
260
+ },
261
+ {
262
+ "name": "model.layers.2.mlp.down_proj.q_weight",
263
+ "shape": [
264
+ 384,
265
+ 96
266
+ ],
267
+ "dtype": "uint32",
268
+ "format": "f32-to-bf16",
269
+ "nbytes": 147456,
270
+ "byteOffset": 3211008
271
+ },
272
+ {
273
+ "name": "model.layers.2.mlp.down_proj.q_scale",
274
+ "shape": [
275
+ 384,
276
+ 24
277
+ ],
278
+ "dtype": "float16",
279
+ "format": "f32-to-bf16",
280
+ "nbytes": 18432,
281
+ "byteOffset": 3358464
282
+ },
283
+ {
284
+ "name": "model.layers.2.mlp.gate_up_proj.q_weight",
285
+ "shape": [
286
+ 1536,
287
+ 48
288
+ ],
289
+ "dtype": "uint32",
290
+ "format": "f32-to-bf16",
291
+ "nbytes": 294912,
292
+ "byteOffset": 3376896
293
+ },
294
+ {
295
+ "name": "model.layers.2.mlp.gate_up_proj.q_scale",
296
+ "shape": [
297
+ 1536,
298
+ 12
299
+ ],
300
+ "dtype": "float16",
301
+ "format": "f32-to-bf16",
302
+ "nbytes": 36864,
303
+ "byteOffset": 3671808
304
+ },
305
+ {
306
+ "name": "model.layers.2.post_attention_layernorm.weight",
307
+ "shape": [
308
+ 384
309
+ ],
310
+ "dtype": "float16",
311
+ "format": "f32-to-bf16",
312
+ "nbytes": 768,
313
+ "byteOffset": 3708672
314
+ },
315
+ {
316
+ "name": "model.layers.2.self_attn.qkv_proj.q_weight",
317
+ "shape": [
318
+ 2048,
319
+ 48
320
+ ],
321
+ "dtype": "uint32",
322
+ "format": "f32-to-bf16",
323
+ "nbytes": 393216,
324
+ "byteOffset": 3709440
325
+ },
326
+ {
327
+ "name": "model.layers.2.self_attn.qkv_proj.q_scale",
328
+ "shape": [
329
+ 2048,
330
+ 12
331
+ ],
332
+ "dtype": "float16",
333
+ "format": "f32-to-bf16",
334
+ "nbytes": 49152,
335
+ "byteOffset": 4102656
336
+ },
337
+ {
338
+ "name": "model.layers.2.self_attn.o_proj.q_weight",
339
+ "shape": [
340
+ 384,
341
+ 128
342
+ ],
343
+ "dtype": "uint32",
344
+ "format": "f32-to-bf16",
345
+ "nbytes": 196608,
346
+ "byteOffset": 4151808
347
+ },
348
+ {
349
+ "name": "model.layers.2.self_attn.o_proj.q_scale",
350
+ "shape": [
351
+ 384,
352
+ 32
353
+ ],
354
+ "dtype": "float16",
355
+ "format": "f32-to-bf16",
356
+ "nbytes": 24576,
357
+ "byteOffset": 4348416
358
+ },
359
+ {
360
+ "name": "model.layers.3.input_layernorm.weight",
361
+ "shape": [
362
+ 384
363
+ ],
364
+ "dtype": "float16",
365
+ "format": "f32-to-bf16",
366
+ "nbytes": 768,
367
+ "byteOffset": 4372992
368
+ },
369
+ {
370
+ "name": "model.layers.3.mlp.down_proj.q_weight",
371
+ "shape": [
372
+ 384,
373
+ 96
374
+ ],
375
+ "dtype": "uint32",
376
+ "format": "f32-to-bf16",
377
+ "nbytes": 147456,
378
+ "byteOffset": 4373760
379
+ },
380
+ {
381
+ "name": "model.layers.3.mlp.down_proj.q_scale",
382
+ "shape": [
383
+ 384,
384
+ 24
385
+ ],
386
+ "dtype": "float16",
387
+ "format": "f32-to-bf16",
388
+ "nbytes": 18432,
389
+ "byteOffset": 4521216
390
+ },
391
+ {
392
+ "name": "model.layers.3.mlp.gate_up_proj.q_weight",
393
+ "shape": [
394
+ 1536,
395
+ 48
396
+ ],
397
+ "dtype": "uint32",
398
+ "format": "f32-to-bf16",
399
+ "nbytes": 294912,
400
+ "byteOffset": 4539648
401
+ },
402
+ {
403
+ "name": "model.layers.3.mlp.gate_up_proj.q_scale",
404
+ "shape": [
405
+ 1536,
406
+ 12
407
+ ],
408
+ "dtype": "float16",
409
+ "format": "f32-to-bf16",
410
+ "nbytes": 36864,
411
+ "byteOffset": 4834560
412
+ },
413
+ {
414
+ "name": "model.layers.3.post_attention_layernorm.weight",
415
+ "shape": [
416
+ 384
417
+ ],
418
+ "dtype": "float16",
419
+ "format": "f32-to-bf16",
420
+ "nbytes": 768,
421
+ "byteOffset": 4871424
422
+ },
423
+ {
424
+ "name": "model.layers.3.self_attn.qkv_proj.q_weight",
425
+ "shape": [
426
+ 2048,
427
+ 48
428
+ ],
429
+ "dtype": "uint32",
430
+ "format": "f32-to-bf16",
431
+ "nbytes": 393216,
432
+ "byteOffset": 4872192
433
+ },
434
+ {
435
+ "name": "model.layers.3.self_attn.qkv_proj.q_scale",
436
+ "shape": [
437
+ 2048,
438
+ 12
439
+ ],
440
+ "dtype": "float16",
441
+ "format": "f32-to-bf16",
442
+ "nbytes": 49152,
443
+ "byteOffset": 5265408
444
+ },
445
+ {
446
+ "name": "model.layers.3.self_attn.o_proj.q_weight",
447
+ "shape": [
448
+ 384,
449
+ 128
450
+ ],
451
+ "dtype": "uint32",
452
+ "format": "f32-to-bf16",
453
+ "nbytes": 196608,
454
+ "byteOffset": 5314560
455
+ },
456
+ {
457
+ "name": "model.layers.3.self_attn.o_proj.q_scale",
458
+ "shape": [
459
+ 384,
460
+ 32
461
+ ],
462
+ "dtype": "float16",
463
+ "format": "f32-to-bf16",
464
+ "nbytes": 24576,
465
+ "byteOffset": 5511168
466
+ },
467
+ {
468
+ "name": "model.layers.4.input_layernorm.weight",
469
+ "shape": [
470
+ 384
471
+ ],
472
+ "dtype": "float16",
473
+ "format": "f32-to-bf16",
474
+ "nbytes": 768,
475
+ "byteOffset": 5535744
476
+ },
477
+ {
478
+ "name": "model.layers.4.mlp.down_proj.q_weight",
479
+ "shape": [
480
+ 384,
481
+ 96
482
+ ],
483
+ "dtype": "uint32",
484
+ "format": "f32-to-bf16",
485
+ "nbytes": 147456,
486
+ "byteOffset": 5536512
487
+ },
488
+ {
489
+ "name": "model.layers.4.mlp.down_proj.q_scale",
490
+ "shape": [
491
+ 384,
492
+ 24
493
+ ],
494
+ "dtype": "float16",
495
+ "format": "f32-to-bf16",
496
+ "nbytes": 18432,
497
+ "byteOffset": 5683968
498
+ },
499
+ {
500
+ "name": "model.layers.4.mlp.gate_up_proj.q_weight",
501
+ "shape": [
502
+ 1536,
503
+ 48
504
+ ],
505
+ "dtype": "uint32",
506
+ "format": "f32-to-bf16",
507
+ "nbytes": 294912,
508
+ "byteOffset": 5702400
509
+ },
510
+ {
511
+ "name": "model.layers.4.mlp.gate_up_proj.q_scale",
512
+ "shape": [
513
+ 1536,
514
+ 12
515
+ ],
516
+ "dtype": "float16",
517
+ "format": "f32-to-bf16",
518
+ "nbytes": 36864,
519
+ "byteOffset": 5997312
520
+ },
521
+ {
522
+ "name": "model.layers.4.post_attention_layernorm.weight",
523
+ "shape": [
524
+ 384
525
+ ],
526
+ "dtype": "float16",
527
+ "format": "f32-to-bf16",
528
+ "nbytes": 768,
529
+ "byteOffset": 6034176
530
+ },
531
+ {
532
+ "name": "model.layers.4.self_attn.qkv_proj.q_weight",
533
+ "shape": [
534
+ 2048,
535
+ 48
536
+ ],
537
+ "dtype": "uint32",
538
+ "format": "f32-to-bf16",
539
+ "nbytes": 393216,
540
+ "byteOffset": 6034944
541
+ },
542
+ {
543
+ "name": "model.layers.4.self_attn.qkv_proj.q_scale",
544
+ "shape": [
545
+ 2048,
546
+ 12
547
+ ],
548
+ "dtype": "float16",
549
+ "format": "f32-to-bf16",
550
+ "nbytes": 49152,
551
+ "byteOffset": 6428160
552
+ },
553
+ {
554
+ "name": "model.layers.4.self_attn.o_proj.q_weight",
555
+ "shape": [
556
+ 384,
557
+ 128
558
+ ],
559
+ "dtype": "uint32",
560
+ "format": "f32-to-bf16",
561
+ "nbytes": 196608,
562
+ "byteOffset": 6477312
563
+ },
564
+ {
565
+ "name": "model.layers.4.self_attn.o_proj.q_scale",
566
+ "shape": [
567
+ 384,
568
+ 32
569
+ ],
570
+ "dtype": "float16",
571
+ "format": "f32-to-bf16",
572
+ "nbytes": 24576,
573
+ "byteOffset": 6673920
574
+ },
575
+ {
576
+ "name": "model.layers.5.input_layernorm.weight",
577
+ "shape": [
578
+ 384
579
+ ],
580
+ "dtype": "float16",
581
+ "format": "f32-to-bf16",
582
+ "nbytes": 768,
583
+ "byteOffset": 6698496
584
+ },
585
+ {
586
+ "name": "model.layers.5.mlp.down_proj.q_weight",
587
+ "shape": [
588
+ 384,
589
+ 96
590
+ ],
591
+ "dtype": "uint32",
592
+ "format": "f32-to-bf16",
593
+ "nbytes": 147456,
594
+ "byteOffset": 6699264
595
+ },
596
+ {
597
+ "name": "model.layers.5.mlp.down_proj.q_scale",
598
+ "shape": [
599
+ 384,
600
+ 24
601
+ ],
602
+ "dtype": "float16",
603
+ "format": "f32-to-bf16",
604
+ "nbytes": 18432,
605
+ "byteOffset": 6846720
606
+ },
607
+ {
608
+ "name": "model.layers.5.mlp.gate_up_proj.q_weight",
609
+ "shape": [
610
+ 1536,
611
+ 48
612
+ ],
613
+ "dtype": "uint32",
614
+ "format": "f32-to-bf16",
615
+ "nbytes": 294912,
616
+ "byteOffset": 6865152
617
+ },
618
+ {
619
+ "name": "model.layers.5.mlp.gate_up_proj.q_scale",
620
+ "shape": [
621
+ 1536,
622
+ 12
623
+ ],
624
+ "dtype": "float16",
625
+ "format": "f32-to-bf16",
626
+ "nbytes": 36864,
627
+ "byteOffset": 7160064
628
+ },
629
+ {
630
+ "name": "model.layers.5.post_attention_layernorm.weight",
631
+ "shape": [
632
+ 384
633
+ ],
634
+ "dtype": "float16",
635
+ "format": "f32-to-bf16",
636
+ "nbytes": 768,
637
+ "byteOffset": 7196928
638
+ },
639
+ {
640
+ "name": "model.layers.5.self_attn.qkv_proj.q_weight",
641
+ "shape": [
642
+ 2048,
643
+ 48
644
+ ],
645
+ "dtype": "uint32",
646
+ "format": "f32-to-bf16",
647
+ "nbytes": 393216,
648
+ "byteOffset": 7197696
649
+ },
650
+ {
651
+ "name": "model.layers.5.self_attn.qkv_proj.q_scale",
652
+ "shape": [
653
+ 2048,
654
+ 12
655
+ ],
656
+ "dtype": "float16",
657
+ "format": "f32-to-bf16",
658
+ "nbytes": 49152,
659
+ "byteOffset": 7590912
660
+ },
661
+ {
662
+ "name": "model.layers.5.self_attn.o_proj.q_weight",
663
+ "shape": [
664
+ 384,
665
+ 128
666
+ ],
667
+ "dtype": "uint32",
668
+ "format": "f32-to-bf16",
669
+ "nbytes": 196608,
670
+ "byteOffset": 7640064
671
+ },
672
+ {
673
+ "name": "model.layers.5.self_attn.o_proj.q_scale",
674
+ "shape": [
675
+ 384,
676
+ 32
677
+ ],
678
+ "dtype": "float16",
679
+ "format": "f32-to-bf16",
680
+ "nbytes": 24576,
681
+ "byteOffset": 7836672
682
+ },
683
+ {
684
+ "name": "model.norm.weight",
685
+ "shape": [
686
+ 384
687
+ ],
688
+ "dtype": "float16",
689
+ "format": "f32-to-bf16",
690
+ "nbytes": 768,
691
+ "byteOffset": 7861248
692
+ }
693
+ ],
694
+ "md5sum": "c4a7c141bffb4bf77b662a6b32138dc1"
695
+ }
696
+ ]
697
+ }
params_shard_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:846360408eba074c7e9945334aa88afd0684bf7a7e4b47e6fd4b097cd2b14651
3
+ size 7862016
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<|pad|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<|bos|>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "<|eos|>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "bos_token": "<|bos|>",
29
+ "clean_up_tokenization_spaces": false,
30
+ "eos_token": "<|eos|>",
31
+ "model_max_length": 1000000000000000019884624838656,
32
+ "pad_token": "<|pad|>",
33
+ "tokenizer_class": "PreTrainedTokenizerFast"
34
+ }