Saurav Panda
commited on
Commit
·
0ffcd1d
1
Parent(s):
19020e6
Fixing model loading issue
Browse files- ndarray-cache.json +483 -483
- params_shard_0.bin +1 -1
- params_shard_1.bin +1 -1
- params_shard_2.bin +1 -1
ndarray-cache.json
CHANGED
@@ -45,8 +45,8 @@
|
|
45 |
{
|
46 |
"name": "model.layers.0.mlp.down_proj.q_weight",
|
47 |
"shape": [
|
48 |
-
|
49 |
-
|
50 |
],
|
51 |
"dtype": "uint32",
|
52 |
"format": "f32-to-bf16",
|
@@ -56,8 +56,8 @@
|
|
56 |
{
|
57 |
"name": "model.layers.0.mlp.down_proj.q_scale",
|
58 |
"shape": [
|
59 |
-
|
60 |
-
|
61 |
],
|
62 |
"dtype": "float16",
|
63 |
"format": "f32-to-bf16",
|
@@ -67,8 +67,8 @@
|
|
67 |
{
|
68 |
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
|
69 |
"shape": [
|
70 |
-
|
71 |
-
|
72 |
],
|
73 |
"dtype": "uint32",
|
74 |
"format": "f32-to-bf16",
|
@@ -78,8 +78,8 @@
|
|
78 |
{
|
79 |
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
|
80 |
"shape": [
|
81 |
-
|
82 |
-
|
83 |
],
|
84 |
"dtype": "float16",
|
85 |
"format": "f32-to-bf16",
|
@@ -99,8 +99,8 @@
|
|
99 |
{
|
100 |
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
|
101 |
"shape": [
|
102 |
-
|
103 |
-
|
104 |
],
|
105 |
"dtype": "uint32",
|
106 |
"format": "f32-to-bf16",
|
@@ -110,8 +110,8 @@
|
|
110 |
{
|
111 |
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
|
112 |
"shape": [
|
113 |
-
|
114 |
-
|
115 |
],
|
116 |
"dtype": "float16",
|
117 |
"format": "f32-to-bf16",
|
@@ -121,8 +121,8 @@
|
|
121 |
{
|
122 |
"name": "model.layers.0.self_attn.o_proj.q_weight",
|
123 |
"shape": [
|
124 |
-
|
125 |
-
|
126 |
],
|
127 |
"dtype": "uint32",
|
128 |
"format": "f32-to-bf16",
|
@@ -132,8 +132,8 @@
|
|
132 |
{
|
133 |
"name": "model.layers.0.self_attn.o_proj.q_scale",
|
134 |
"shape": [
|
135 |
-
|
136 |
-
|
137 |
],
|
138 |
"dtype": "float16",
|
139 |
"format": "f32-to-bf16",
|
@@ -153,8 +153,8 @@
|
|
153 |
{
|
154 |
"name": "model.layers.1.mlp.down_proj.q_weight",
|
155 |
"shape": [
|
156 |
-
|
157 |
-
|
158 |
],
|
159 |
"dtype": "uint32",
|
160 |
"format": "f32-to-bf16",
|
@@ -164,8 +164,8 @@
|
|
164 |
{
|
165 |
"name": "model.layers.1.mlp.down_proj.q_scale",
|
166 |
"shape": [
|
167 |
-
|
168 |
-
|
169 |
],
|
170 |
"dtype": "float16",
|
171 |
"format": "f32-to-bf16",
|
@@ -175,8 +175,8 @@
|
|
175 |
{
|
176 |
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
|
177 |
"shape": [
|
178 |
-
|
179 |
-
|
180 |
],
|
181 |
"dtype": "uint32",
|
182 |
"format": "f32-to-bf16",
|
@@ -186,8 +186,8 @@
|
|
186 |
{
|
187 |
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
|
188 |
"shape": [
|
189 |
-
|
190 |
-
|
191 |
],
|
192 |
"dtype": "float16",
|
193 |
"format": "f32-to-bf16",
|
@@ -207,8 +207,8 @@
|
|
207 |
{
|
208 |
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
|
209 |
"shape": [
|
210 |
-
|
211 |
-
|
212 |
],
|
213 |
"dtype": "uint32",
|
214 |
"format": "f32-to-bf16",
|
@@ -218,8 +218,8 @@
|
|
218 |
{
|
219 |
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
|
220 |
"shape": [
|
221 |
-
|
222 |
-
|
223 |
],
|
224 |
"dtype": "float16",
|
225 |
"format": "f32-to-bf16",
|
@@ -229,8 +229,8 @@
|
|
229 |
{
|
230 |
"name": "model.layers.1.self_attn.o_proj.q_weight",
|
231 |
"shape": [
|
232 |
-
|
233 |
-
|
234 |
],
|
235 |
"dtype": "uint32",
|
236 |
"format": "f32-to-bf16",
|
@@ -240,8 +240,8 @@
|
|
240 |
{
|
241 |
"name": "model.layers.1.self_attn.o_proj.q_scale",
|
242 |
"shape": [
|
243 |
-
|
244 |
-
|
245 |
],
|
246 |
"dtype": "float16",
|
247 |
"format": "f32-to-bf16",
|
@@ -261,8 +261,8 @@
|
|
261 |
{
|
262 |
"name": "model.layers.10.mlp.down_proj.q_weight",
|
263 |
"shape": [
|
264 |
-
|
265 |
-
|
266 |
],
|
267 |
"dtype": "uint32",
|
268 |
"format": "f32-to-bf16",
|
@@ -272,8 +272,8 @@
|
|
272 |
{
|
273 |
"name": "model.layers.10.mlp.down_proj.q_scale",
|
274 |
"shape": [
|
275 |
-
|
276 |
-
|
277 |
],
|
278 |
"dtype": "float16",
|
279 |
"format": "f32-to-bf16",
|
@@ -283,8 +283,8 @@
|
|
283 |
{
|
284 |
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
|
285 |
"shape": [
|
286 |
-
|
287 |
-
|
288 |
],
|
289 |
"dtype": "uint32",
|
290 |
"format": "f32-to-bf16",
|
@@ -294,8 +294,8 @@
|
|
294 |
{
|
295 |
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
|
296 |
"shape": [
|
297 |
-
|
298 |
-
|
299 |
],
|
300 |
"dtype": "float16",
|
301 |
"format": "f32-to-bf16",
|
@@ -315,8 +315,8 @@
|
|
315 |
{
|
316 |
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
|
317 |
"shape": [
|
318 |
-
|
319 |
-
|
320 |
],
|
321 |
"dtype": "uint32",
|
322 |
"format": "f32-to-bf16",
|
@@ -326,8 +326,8 @@
|
|
326 |
{
|
327 |
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
|
328 |
"shape": [
|
329 |
-
|
330 |
-
|
331 |
],
|
332 |
"dtype": "float16",
|
333 |
"format": "f32-to-bf16",
|
@@ -337,8 +337,8 @@
|
|
337 |
{
|
338 |
"name": "model.layers.10.self_attn.o_proj.q_weight",
|
339 |
"shape": [
|
340 |
-
|
341 |
-
|
342 |
],
|
343 |
"dtype": "uint32",
|
344 |
"format": "f32-to-bf16",
|
@@ -348,8 +348,8 @@
|
|
348 |
{
|
349 |
"name": "model.layers.10.self_attn.o_proj.q_scale",
|
350 |
"shape": [
|
351 |
-
|
352 |
-
|
353 |
],
|
354 |
"dtype": "float16",
|
355 |
"format": "f32-to-bf16",
|
@@ -369,8 +369,8 @@
|
|
369 |
{
|
370 |
"name": "model.layers.11.mlp.down_proj.q_weight",
|
371 |
"shape": [
|
372 |
-
|
373 |
-
|
374 |
],
|
375 |
"dtype": "uint32",
|
376 |
"format": "f32-to-bf16",
|
@@ -380,8 +380,8 @@
|
|
380 |
{
|
381 |
"name": "model.layers.11.mlp.down_proj.q_scale",
|
382 |
"shape": [
|
383 |
-
|
384 |
-
|
385 |
],
|
386 |
"dtype": "float16",
|
387 |
"format": "f32-to-bf16",
|
@@ -391,8 +391,8 @@
|
|
391 |
{
|
392 |
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
|
393 |
"shape": [
|
394 |
-
|
395 |
-
|
396 |
],
|
397 |
"dtype": "uint32",
|
398 |
"format": "f32-to-bf16",
|
@@ -402,8 +402,8 @@
|
|
402 |
{
|
403 |
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
|
404 |
"shape": [
|
405 |
-
|
406 |
-
|
407 |
],
|
408 |
"dtype": "float16",
|
409 |
"format": "f32-to-bf16",
|
@@ -423,8 +423,8 @@
|
|
423 |
{
|
424 |
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
|
425 |
"shape": [
|
426 |
-
|
427 |
-
|
428 |
],
|
429 |
"dtype": "uint32",
|
430 |
"format": "f32-to-bf16",
|
@@ -434,8 +434,8 @@
|
|
434 |
{
|
435 |
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
|
436 |
"shape": [
|
437 |
-
|
438 |
-
|
439 |
],
|
440 |
"dtype": "float16",
|
441 |
"format": "f32-to-bf16",
|
@@ -445,8 +445,8 @@
|
|
445 |
{
|
446 |
"name": "model.layers.11.self_attn.o_proj.q_weight",
|
447 |
"shape": [
|
448 |
-
|
449 |
-
|
450 |
],
|
451 |
"dtype": "uint32",
|
452 |
"format": "f32-to-bf16",
|
@@ -456,8 +456,8 @@
|
|
456 |
{
|
457 |
"name": "model.layers.11.self_attn.o_proj.q_scale",
|
458 |
"shape": [
|
459 |
-
|
460 |
-
|
461 |
],
|
462 |
"dtype": "float16",
|
463 |
"format": "f32-to-bf16",
|
@@ -477,8 +477,8 @@
|
|
477 |
{
|
478 |
"name": "model.layers.12.mlp.down_proj.q_weight",
|
479 |
"shape": [
|
480 |
-
|
481 |
-
|
482 |
],
|
483 |
"dtype": "uint32",
|
484 |
"format": "f32-to-bf16",
|
@@ -488,8 +488,8 @@
|
|
488 |
{
|
489 |
"name": "model.layers.12.mlp.down_proj.q_scale",
|
490 |
"shape": [
|
491 |
-
|
492 |
-
|
493 |
],
|
494 |
"dtype": "float16",
|
495 |
"format": "f32-to-bf16",
|
@@ -499,8 +499,8 @@
|
|
499 |
{
|
500 |
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
|
501 |
"shape": [
|
502 |
-
|
503 |
-
|
504 |
],
|
505 |
"dtype": "uint32",
|
506 |
"format": "f32-to-bf16",
|
@@ -510,8 +510,8 @@
|
|
510 |
{
|
511 |
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
|
512 |
"shape": [
|
513 |
-
|
514 |
-
|
515 |
],
|
516 |
"dtype": "float16",
|
517 |
"format": "f32-to-bf16",
|
@@ -531,8 +531,8 @@
|
|
531 |
{
|
532 |
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
|
533 |
"shape": [
|
534 |
-
|
535 |
-
|
536 |
],
|
537 |
"dtype": "uint32",
|
538 |
"format": "f32-to-bf16",
|
@@ -542,8 +542,8 @@
|
|
542 |
{
|
543 |
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
|
544 |
"shape": [
|
545 |
-
|
546 |
-
|
547 |
],
|
548 |
"dtype": "float16",
|
549 |
"format": "f32-to-bf16",
|
@@ -553,8 +553,8 @@
|
|
553 |
{
|
554 |
"name": "model.layers.12.self_attn.o_proj.q_weight",
|
555 |
"shape": [
|
556 |
-
|
557 |
-
|
558 |
],
|
559 |
"dtype": "uint32",
|
560 |
"format": "f32-to-bf16",
|
@@ -564,8 +564,8 @@
|
|
564 |
{
|
565 |
"name": "model.layers.12.self_attn.o_proj.q_scale",
|
566 |
"shape": [
|
567 |
-
|
568 |
-
|
569 |
],
|
570 |
"dtype": "float16",
|
571 |
"format": "f32-to-bf16",
|
@@ -585,8 +585,8 @@
|
|
585 |
{
|
586 |
"name": "model.layers.13.mlp.down_proj.q_weight",
|
587 |
"shape": [
|
588 |
-
|
589 |
-
|
590 |
],
|
591 |
"dtype": "uint32",
|
592 |
"format": "f32-to-bf16",
|
@@ -596,8 +596,8 @@
|
|
596 |
{
|
597 |
"name": "model.layers.13.mlp.down_proj.q_scale",
|
598 |
"shape": [
|
599 |
-
|
600 |
-
|
601 |
],
|
602 |
"dtype": "float16",
|
603 |
"format": "f32-to-bf16",
|
@@ -607,8 +607,8 @@
|
|
607 |
{
|
608 |
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
|
609 |
"shape": [
|
610 |
-
|
611 |
-
|
612 |
],
|
613 |
"dtype": "uint32",
|
614 |
"format": "f32-to-bf16",
|
@@ -618,8 +618,8 @@
|
|
618 |
{
|
619 |
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
|
620 |
"shape": [
|
621 |
-
|
622 |
-
|
623 |
],
|
624 |
"dtype": "float16",
|
625 |
"format": "f32-to-bf16",
|
@@ -639,8 +639,8 @@
|
|
639 |
{
|
640 |
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
|
641 |
"shape": [
|
642 |
-
|
643 |
-
|
644 |
],
|
645 |
"dtype": "uint32",
|
646 |
"format": "f32-to-bf16",
|
@@ -650,8 +650,8 @@
|
|
650 |
{
|
651 |
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
|
652 |
"shape": [
|
653 |
-
|
654 |
-
|
655 |
],
|
656 |
"dtype": "float16",
|
657 |
"format": "f32-to-bf16",
|
@@ -661,8 +661,8 @@
|
|
661 |
{
|
662 |
"name": "model.layers.13.self_attn.o_proj.q_weight",
|
663 |
"shape": [
|
664 |
-
|
665 |
-
|
666 |
],
|
667 |
"dtype": "uint32",
|
668 |
"format": "f32-to-bf16",
|
@@ -672,8 +672,8 @@
|
|
672 |
{
|
673 |
"name": "model.layers.13.self_attn.o_proj.q_scale",
|
674 |
"shape": [
|
675 |
-
|
676 |
-
|
677 |
],
|
678 |
"dtype": "float16",
|
679 |
"format": "f32-to-bf16",
|
@@ -693,8 +693,8 @@
|
|
693 |
{
|
694 |
"name": "model.layers.14.mlp.down_proj.q_weight",
|
695 |
"shape": [
|
696 |
-
|
697 |
-
|
698 |
],
|
699 |
"dtype": "uint32",
|
700 |
"format": "f32-to-bf16",
|
@@ -704,8 +704,8 @@
|
|
704 |
{
|
705 |
"name": "model.layers.14.mlp.down_proj.q_scale",
|
706 |
"shape": [
|
707 |
-
|
708 |
-
|
709 |
],
|
710 |
"dtype": "float16",
|
711 |
"format": "f32-to-bf16",
|
@@ -715,8 +715,8 @@
|
|
715 |
{
|
716 |
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
|
717 |
"shape": [
|
718 |
-
|
719 |
-
|
720 |
],
|
721 |
"dtype": "uint32",
|
722 |
"format": "f32-to-bf16",
|
@@ -726,8 +726,8 @@
|
|
726 |
{
|
727 |
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
|
728 |
"shape": [
|
729 |
-
|
730 |
-
|
731 |
],
|
732 |
"dtype": "float16",
|
733 |
"format": "f32-to-bf16",
|
@@ -747,8 +747,8 @@
|
|
747 |
{
|
748 |
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
|
749 |
"shape": [
|
750 |
-
|
751 |
-
|
752 |
],
|
753 |
"dtype": "uint32",
|
754 |
"format": "f32-to-bf16",
|
@@ -758,8 +758,8 @@
|
|
758 |
{
|
759 |
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
|
760 |
"shape": [
|
761 |
-
|
762 |
-
|
763 |
],
|
764 |
"dtype": "float16",
|
765 |
"format": "f32-to-bf16",
|
@@ -769,8 +769,8 @@
|
|
769 |
{
|
770 |
"name": "model.layers.14.self_attn.o_proj.q_weight",
|
771 |
"shape": [
|
772 |
-
|
773 |
-
|
774 |
],
|
775 |
"dtype": "uint32",
|
776 |
"format": "f32-to-bf16",
|
@@ -780,8 +780,8 @@
|
|
780 |
{
|
781 |
"name": "model.layers.14.self_attn.o_proj.q_scale",
|
782 |
"shape": [
|
783 |
-
|
784 |
-
|
785 |
],
|
786 |
"dtype": "float16",
|
787 |
"format": "f32-to-bf16",
|
@@ -801,8 +801,8 @@
|
|
801 |
{
|
802 |
"name": "model.layers.15.mlp.down_proj.q_weight",
|
803 |
"shape": [
|
804 |
-
|
805 |
-
|
806 |
],
|
807 |
"dtype": "uint32",
|
808 |
"format": "f32-to-bf16",
|
@@ -812,8 +812,8 @@
|
|
812 |
{
|
813 |
"name": "model.layers.15.mlp.down_proj.q_scale",
|
814 |
"shape": [
|
815 |
-
|
816 |
-
|
817 |
],
|
818 |
"dtype": "float16",
|
819 |
"format": "f32-to-bf16",
|
@@ -823,8 +823,8 @@
|
|
823 |
{
|
824 |
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
|
825 |
"shape": [
|
826 |
-
|
827 |
-
|
828 |
],
|
829 |
"dtype": "uint32",
|
830 |
"format": "f32-to-bf16",
|
@@ -834,8 +834,8 @@
|
|
834 |
{
|
835 |
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
|
836 |
"shape": [
|
837 |
-
|
838 |
-
|
839 |
],
|
840 |
"dtype": "float16",
|
841 |
"format": "f32-to-bf16",
|
@@ -855,8 +855,8 @@
|
|
855 |
{
|
856 |
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
|
857 |
"shape": [
|
858 |
-
|
859 |
-
|
860 |
],
|
861 |
"dtype": "uint32",
|
862 |
"format": "f32-to-bf16",
|
@@ -866,8 +866,8 @@
|
|
866 |
{
|
867 |
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
|
868 |
"shape": [
|
869 |
-
|
870 |
-
|
871 |
],
|
872 |
"dtype": "float16",
|
873 |
"format": "f32-to-bf16",
|
@@ -877,8 +877,8 @@
|
|
877 |
{
|
878 |
"name": "model.layers.15.self_attn.o_proj.q_weight",
|
879 |
"shape": [
|
880 |
-
|
881 |
-
|
882 |
],
|
883 |
"dtype": "uint32",
|
884 |
"format": "f32-to-bf16",
|
@@ -888,8 +888,8 @@
|
|
888 |
{
|
889 |
"name": "model.layers.15.self_attn.o_proj.q_scale",
|
890 |
"shape": [
|
891 |
-
|
892 |
-
|
893 |
],
|
894 |
"dtype": "float16",
|
895 |
"format": "f32-to-bf16",
|
@@ -909,8 +909,8 @@
|
|
909 |
{
|
910 |
"name": "model.layers.16.mlp.down_proj.q_weight",
|
911 |
"shape": [
|
912 |
-
|
913 |
-
|
914 |
],
|
915 |
"dtype": "uint32",
|
916 |
"format": "f32-to-bf16",
|
@@ -920,8 +920,8 @@
|
|
920 |
{
|
921 |
"name": "model.layers.16.mlp.down_proj.q_scale",
|
922 |
"shape": [
|
923 |
-
|
924 |
-
|
925 |
],
|
926 |
"dtype": "float16",
|
927 |
"format": "f32-to-bf16",
|
@@ -931,8 +931,8 @@
|
|
931 |
{
|
932 |
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
|
933 |
"shape": [
|
934 |
-
|
935 |
-
|
936 |
],
|
937 |
"dtype": "uint32",
|
938 |
"format": "f32-to-bf16",
|
@@ -942,8 +942,8 @@
|
|
942 |
{
|
943 |
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
|
944 |
"shape": [
|
945 |
-
|
946 |
-
|
947 |
],
|
948 |
"dtype": "float16",
|
949 |
"format": "f32-to-bf16",
|
@@ -961,7 +961,7 @@
|
|
961 |
"byteOffset": 33363396
|
962 |
}
|
963 |
],
|
964 |
-
"md5sum": "
|
965 |
},
|
966 |
{
|
967 |
"dataPath": "params_shard_1.bin",
|
@@ -971,8 +971,8 @@
|
|
971 |
{
|
972 |
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
|
973 |
"shape": [
|
974 |
-
|
975 |
-
|
976 |
],
|
977 |
"dtype": "uint32",
|
978 |
"format": "f32-to-bf16",
|
@@ -982,8 +982,8 @@
|
|
982 |
{
|
983 |
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
|
984 |
"shape": [
|
985 |
-
|
986 |
-
|
987 |
],
|
988 |
"dtype": "float16",
|
989 |
"format": "f32-to-bf16",
|
@@ -993,8 +993,8 @@
|
|
993 |
{
|
994 |
"name": "model.layers.16.self_attn.o_proj.q_weight",
|
995 |
"shape": [
|
996 |
-
|
997 |
-
|
998 |
],
|
999 |
"dtype": "uint32",
|
1000 |
"format": "f32-to-bf16",
|
@@ -1004,8 +1004,8 @@
|
|
1004 |
{
|
1005 |
"name": "model.layers.16.self_attn.o_proj.q_scale",
|
1006 |
"shape": [
|
1007 |
-
|
1008 |
-
|
1009 |
],
|
1010 |
"dtype": "float16",
|
1011 |
"format": "f32-to-bf16",
|
@@ -1025,8 +1025,8 @@
|
|
1025 |
{
|
1026 |
"name": "model.layers.17.mlp.down_proj.q_weight",
|
1027 |
"shape": [
|
1028 |
-
|
1029 |
-
|
1030 |
],
|
1031 |
"dtype": "uint32",
|
1032 |
"format": "f32-to-bf16",
|
@@ -1036,8 +1036,8 @@
|
|
1036 |
{
|
1037 |
"name": "model.layers.17.mlp.down_proj.q_scale",
|
1038 |
"shape": [
|
1039 |
-
|
1040 |
-
|
1041 |
],
|
1042 |
"dtype": "float16",
|
1043 |
"format": "f32-to-bf16",
|
@@ -1047,8 +1047,8 @@
|
|
1047 |
{
|
1048 |
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
|
1049 |
"shape": [
|
1050 |
-
|
1051 |
-
|
1052 |
],
|
1053 |
"dtype": "uint32",
|
1054 |
"format": "f32-to-bf16",
|
@@ -1058,8 +1058,8 @@
|
|
1058 |
{
|
1059 |
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
|
1060 |
"shape": [
|
1061 |
-
|
1062 |
-
|
1063 |
],
|
1064 |
"dtype": "float16",
|
1065 |
"format": "f32-to-bf16",
|
@@ -1079,8 +1079,8 @@
|
|
1079 |
{
|
1080 |
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
|
1081 |
"shape": [
|
1082 |
-
|
1083 |
-
|
1084 |
],
|
1085 |
"dtype": "uint32",
|
1086 |
"format": "f32-to-bf16",
|
@@ -1090,8 +1090,8 @@
|
|
1090 |
{
|
1091 |
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
|
1092 |
"shape": [
|
1093 |
-
|
1094 |
-
|
1095 |
],
|
1096 |
"dtype": "float16",
|
1097 |
"format": "f32-to-bf16",
|
@@ -1101,8 +1101,8 @@
|
|
1101 |
{
|
1102 |
"name": "model.layers.17.self_attn.o_proj.q_weight",
|
1103 |
"shape": [
|
1104 |
-
|
1105 |
-
|
1106 |
],
|
1107 |
"dtype": "uint32",
|
1108 |
"format": "f32-to-bf16",
|
@@ -1112,8 +1112,8 @@
|
|
1112 |
{
|
1113 |
"name": "model.layers.17.self_attn.o_proj.q_scale",
|
1114 |
"shape": [
|
1115 |
-
|
1116 |
-
|
1117 |
],
|
1118 |
"dtype": "float16",
|
1119 |
"format": "f32-to-bf16",
|
@@ -1133,8 +1133,8 @@
|
|
1133 |
{
|
1134 |
"name": "model.layers.18.mlp.down_proj.q_weight",
|
1135 |
"shape": [
|
1136 |
-
|
1137 |
-
|
1138 |
],
|
1139 |
"dtype": "uint32",
|
1140 |
"format": "f32-to-bf16",
|
@@ -1144,8 +1144,8 @@
|
|
1144 |
{
|
1145 |
"name": "model.layers.18.mlp.down_proj.q_scale",
|
1146 |
"shape": [
|
1147 |
-
|
1148 |
-
|
1149 |
],
|
1150 |
"dtype": "float16",
|
1151 |
"format": "f32-to-bf16",
|
@@ -1155,8 +1155,8 @@
|
|
1155 |
{
|
1156 |
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
|
1157 |
"shape": [
|
1158 |
-
|
1159 |
-
|
1160 |
],
|
1161 |
"dtype": "uint32",
|
1162 |
"format": "f32-to-bf16",
|
@@ -1166,8 +1166,8 @@
|
|
1166 |
{
|
1167 |
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
|
1168 |
"shape": [
|
1169 |
-
|
1170 |
-
|
1171 |
],
|
1172 |
"dtype": "float16",
|
1173 |
"format": "f32-to-bf16",
|
@@ -1187,8 +1187,8 @@
|
|
1187 |
{
|
1188 |
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
|
1189 |
"shape": [
|
1190 |
-
|
1191 |
-
|
1192 |
],
|
1193 |
"dtype": "uint32",
|
1194 |
"format": "f32-to-bf16",
|
@@ -1198,8 +1198,8 @@
|
|
1198 |
{
|
1199 |
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
|
1200 |
"shape": [
|
1201 |
-
|
1202 |
-
|
1203 |
],
|
1204 |
"dtype": "float16",
|
1205 |
"format": "f32-to-bf16",
|
@@ -1209,8 +1209,8 @@
|
|
1209 |
{
|
1210 |
"name": "model.layers.18.self_attn.o_proj.q_weight",
|
1211 |
"shape": [
|
1212 |
-
|
1213 |
-
|
1214 |
],
|
1215 |
"dtype": "uint32",
|
1216 |
"format": "f32-to-bf16",
|
@@ -1220,8 +1220,8 @@
|
|
1220 |
{
|
1221 |
"name": "model.layers.18.self_attn.o_proj.q_scale",
|
1222 |
"shape": [
|
1223 |
-
|
1224 |
-
|
1225 |
],
|
1226 |
"dtype": "float16",
|
1227 |
"format": "f32-to-bf16",
|
@@ -1241,8 +1241,8 @@
|
|
1241 |
{
|
1242 |
"name": "model.layers.19.mlp.down_proj.q_weight",
|
1243 |
"shape": [
|
1244 |
-
|
1245 |
-
|
1246 |
],
|
1247 |
"dtype": "uint32",
|
1248 |
"format": "f32-to-bf16",
|
@@ -1252,8 +1252,8 @@
|
|
1252 |
{
|
1253 |
"name": "model.layers.19.mlp.down_proj.q_scale",
|
1254 |
"shape": [
|
1255 |
-
|
1256 |
-
|
1257 |
],
|
1258 |
"dtype": "float16",
|
1259 |
"format": "f32-to-bf16",
|
@@ -1263,8 +1263,8 @@
|
|
1263 |
{
|
1264 |
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
|
1265 |
"shape": [
|
1266 |
-
|
1267 |
-
|
1268 |
],
|
1269 |
"dtype": "uint32",
|
1270 |
"format": "f32-to-bf16",
|
@@ -1274,8 +1274,8 @@
|
|
1274 |
{
|
1275 |
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
|
1276 |
"shape": [
|
1277 |
-
|
1278 |
-
|
1279 |
],
|
1280 |
"dtype": "float16",
|
1281 |
"format": "f32-to-bf16",
|
@@ -1295,8 +1295,8 @@
|
|
1295 |
{
|
1296 |
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
|
1297 |
"shape": [
|
1298 |
-
|
1299 |
-
|
1300 |
],
|
1301 |
"dtype": "uint32",
|
1302 |
"format": "f32-to-bf16",
|
@@ -1306,8 +1306,8 @@
|
|
1306 |
{
|
1307 |
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
|
1308 |
"shape": [
|
1309 |
-
|
1310 |
-
|
1311 |
],
|
1312 |
"dtype": "float16",
|
1313 |
"format": "f32-to-bf16",
|
@@ -1317,8 +1317,8 @@
|
|
1317 |
{
|
1318 |
"name": "model.layers.19.self_attn.o_proj.q_weight",
|
1319 |
"shape": [
|
1320 |
-
|
1321 |
-
|
1322 |
],
|
1323 |
"dtype": "uint32",
|
1324 |
"format": "f32-to-bf16",
|
@@ -1328,8 +1328,8 @@
|
|
1328 |
{
|
1329 |
"name": "model.layers.19.self_attn.o_proj.q_scale",
|
1330 |
"shape": [
|
1331 |
-
|
1332 |
-
|
1333 |
],
|
1334 |
"dtype": "float16",
|
1335 |
"format": "f32-to-bf16",
|
@@ -1349,8 +1349,8 @@
|
|
1349 |
{
|
1350 |
"name": "model.layers.2.mlp.down_proj.q_weight",
|
1351 |
"shape": [
|
1352 |
-
|
1353 |
-
|
1354 |
],
|
1355 |
"dtype": "uint32",
|
1356 |
"format": "f32-to-bf16",
|
@@ -1360,8 +1360,8 @@
|
|
1360 |
{
|
1361 |
"name": "model.layers.2.mlp.down_proj.q_scale",
|
1362 |
"shape": [
|
1363 |
-
|
1364 |
-
|
1365 |
],
|
1366 |
"dtype": "float16",
|
1367 |
"format": "f32-to-bf16",
|
@@ -1371,8 +1371,8 @@
|
|
1371 |
{
|
1372 |
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
|
1373 |
"shape": [
|
1374 |
-
|
1375 |
-
|
1376 |
],
|
1377 |
"dtype": "uint32",
|
1378 |
"format": "f32-to-bf16",
|
@@ -1382,8 +1382,8 @@
|
|
1382 |
{
|
1383 |
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
|
1384 |
"shape": [
|
1385 |
-
|
1386 |
-
|
1387 |
],
|
1388 |
"dtype": "float16",
|
1389 |
"format": "f32-to-bf16",
|
@@ -1403,8 +1403,8 @@
|
|
1403 |
{
|
1404 |
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
|
1405 |
"shape": [
|
1406 |
-
|
1407 |
-
|
1408 |
],
|
1409 |
"dtype": "uint32",
|
1410 |
"format": "f32-to-bf16",
|
@@ -1414,8 +1414,8 @@
|
|
1414 |
{
|
1415 |
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
|
1416 |
"shape": [
|
1417 |
-
|
1418 |
-
|
1419 |
],
|
1420 |
"dtype": "float16",
|
1421 |
"format": "f32-to-bf16",
|
@@ -1425,8 +1425,8 @@
|
|
1425 |
{
|
1426 |
"name": "model.layers.2.self_attn.o_proj.q_weight",
|
1427 |
"shape": [
|
1428 |
-
|
1429 |
-
|
1430 |
],
|
1431 |
"dtype": "uint32",
|
1432 |
"format": "f32-to-bf16",
|
@@ -1436,8 +1436,8 @@
|
|
1436 |
{
|
1437 |
"name": "model.layers.2.self_attn.o_proj.q_scale",
|
1438 |
"shape": [
|
1439 |
-
|
1440 |
-
|
1441 |
],
|
1442 |
"dtype": "float16",
|
1443 |
"format": "f32-to-bf16",
|
@@ -1457,8 +1457,8 @@
|
|
1457 |
{
|
1458 |
"name": "model.layers.20.mlp.down_proj.q_weight",
|
1459 |
"shape": [
|
1460 |
-
|
1461 |
-
|
1462 |
],
|
1463 |
"dtype": "uint32",
|
1464 |
"format": "f32-to-bf16",
|
@@ -1468,8 +1468,8 @@
|
|
1468 |
{
|
1469 |
"name": "model.layers.20.mlp.down_proj.q_scale",
|
1470 |
"shape": [
|
1471 |
-
|
1472 |
-
|
1473 |
],
|
1474 |
"dtype": "float16",
|
1475 |
"format": "f32-to-bf16",
|
@@ -1479,8 +1479,8 @@
|
|
1479 |
{
|
1480 |
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
|
1481 |
"shape": [
|
1482 |
-
|
1483 |
-
|
1484 |
],
|
1485 |
"dtype": "uint32",
|
1486 |
"format": "f32-to-bf16",
|
@@ -1490,8 +1490,8 @@
|
|
1490 |
{
|
1491 |
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
|
1492 |
"shape": [
|
1493 |
-
|
1494 |
-
|
1495 |
],
|
1496 |
"dtype": "float16",
|
1497 |
"format": "f32-to-bf16",
|
@@ -1511,8 +1511,8 @@
|
|
1511 |
{
|
1512 |
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
|
1513 |
"shape": [
|
1514 |
-
|
1515 |
-
|
1516 |
],
|
1517 |
"dtype": "uint32",
|
1518 |
"format": "f32-to-bf16",
|
@@ -1522,8 +1522,8 @@
|
|
1522 |
{
|
1523 |
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
|
1524 |
"shape": [
|
1525 |
-
|
1526 |
-
|
1527 |
],
|
1528 |
"dtype": "float16",
|
1529 |
"format": "f32-to-bf16",
|
@@ -1533,8 +1533,8 @@
|
|
1533 |
{
|
1534 |
"name": "model.layers.20.self_attn.o_proj.q_weight",
|
1535 |
"shape": [
|
1536 |
-
|
1537 |
-
|
1538 |
],
|
1539 |
"dtype": "uint32",
|
1540 |
"format": "f32-to-bf16",
|
@@ -1544,8 +1544,8 @@
|
|
1544 |
{
|
1545 |
"name": "model.layers.20.self_attn.o_proj.q_scale",
|
1546 |
"shape": [
|
1547 |
-
|
1548 |
-
|
1549 |
],
|
1550 |
"dtype": "float16",
|
1551 |
"format": "f32-to-bf16",
|
@@ -1565,8 +1565,8 @@
|
|
1565 |
{
|
1566 |
"name": "model.layers.21.mlp.down_proj.q_weight",
|
1567 |
"shape": [
|
1568 |
-
|
1569 |
-
|
1570 |
],
|
1571 |
"dtype": "uint32",
|
1572 |
"format": "f32-to-bf16",
|
@@ -1576,8 +1576,8 @@
|
|
1576 |
{
|
1577 |
"name": "model.layers.21.mlp.down_proj.q_scale",
|
1578 |
"shape": [
|
1579 |
-
|
1580 |
-
|
1581 |
],
|
1582 |
"dtype": "float16",
|
1583 |
"format": "f32-to-bf16",
|
@@ -1587,8 +1587,8 @@
|
|
1587 |
{
|
1588 |
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
|
1589 |
"shape": [
|
1590 |
-
|
1591 |
-
|
1592 |
],
|
1593 |
"dtype": "uint32",
|
1594 |
"format": "f32-to-bf16",
|
@@ -1598,8 +1598,8 @@
|
|
1598 |
{
|
1599 |
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
|
1600 |
"shape": [
|
1601 |
-
|
1602 |
-
|
1603 |
],
|
1604 |
"dtype": "float16",
|
1605 |
"format": "f32-to-bf16",
|
@@ -1619,8 +1619,8 @@
|
|
1619 |
{
|
1620 |
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
|
1621 |
"shape": [
|
1622 |
-
|
1623 |
-
|
1624 |
],
|
1625 |
"dtype": "uint32",
|
1626 |
"format": "f32-to-bf16",
|
@@ -1630,8 +1630,8 @@
|
|
1630 |
{
|
1631 |
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
|
1632 |
"shape": [
|
1633 |
-
|
1634 |
-
|
1635 |
],
|
1636 |
"dtype": "float16",
|
1637 |
"format": "f32-to-bf16",
|
@@ -1641,8 +1641,8 @@
|
|
1641 |
{
|
1642 |
"name": "model.layers.21.self_attn.o_proj.q_weight",
|
1643 |
"shape": [
|
1644 |
-
|
1645 |
-
|
1646 |
],
|
1647 |
"dtype": "uint32",
|
1648 |
"format": "f32-to-bf16",
|
@@ -1652,8 +1652,8 @@
|
|
1652 |
{
|
1653 |
"name": "model.layers.21.self_attn.o_proj.q_scale",
|
1654 |
"shape": [
|
1655 |
-
|
1656 |
-
|
1657 |
],
|
1658 |
"dtype": "float16",
|
1659 |
"format": "f32-to-bf16",
|
@@ -1673,8 +1673,8 @@
|
|
1673 |
{
|
1674 |
"name": "model.layers.22.mlp.down_proj.q_weight",
|
1675 |
"shape": [
|
1676 |
-
|
1677 |
-
|
1678 |
],
|
1679 |
"dtype": "uint32",
|
1680 |
"format": "f32-to-bf16",
|
@@ -1684,8 +1684,8 @@
|
|
1684 |
{
|
1685 |
"name": "model.layers.22.mlp.down_proj.q_scale",
|
1686 |
"shape": [
|
1687 |
-
|
1688 |
-
|
1689 |
],
|
1690 |
"dtype": "float16",
|
1691 |
"format": "f32-to-bf16",
|
@@ -1695,8 +1695,8 @@
|
|
1695 |
{
|
1696 |
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
|
1697 |
"shape": [
|
1698 |
-
|
1699 |
-
|
1700 |
],
|
1701 |
"dtype": "uint32",
|
1702 |
"format": "f32-to-bf16",
|
@@ -1706,8 +1706,8 @@
|
|
1706 |
{
|
1707 |
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
|
1708 |
"shape": [
|
1709 |
-
|
1710 |
-
|
1711 |
],
|
1712 |
"dtype": "float16",
|
1713 |
"format": "f32-to-bf16",
|
@@ -1727,8 +1727,8 @@
|
|
1727 |
{
|
1728 |
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
|
1729 |
"shape": [
|
1730 |
-
|
1731 |
-
|
1732 |
],
|
1733 |
"dtype": "uint32",
|
1734 |
"format": "f32-to-bf16",
|
@@ -1738,8 +1738,8 @@
|
|
1738 |
{
|
1739 |
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
|
1740 |
"shape": [
|
1741 |
-
|
1742 |
-
|
1743 |
],
|
1744 |
"dtype": "float16",
|
1745 |
"format": "f32-to-bf16",
|
@@ -1749,8 +1749,8 @@
|
|
1749 |
{
|
1750 |
"name": "model.layers.22.self_attn.o_proj.q_weight",
|
1751 |
"shape": [
|
1752 |
-
|
1753 |
-
|
1754 |
],
|
1755 |
"dtype": "uint32",
|
1756 |
"format": "f32-to-bf16",
|
@@ -1760,8 +1760,8 @@
|
|
1760 |
{
|
1761 |
"name": "model.layers.22.self_attn.o_proj.q_scale",
|
1762 |
"shape": [
|
1763 |
-
|
1764 |
-
|
1765 |
],
|
1766 |
"dtype": "float16",
|
1767 |
"format": "f32-to-bf16",
|
@@ -1781,8 +1781,8 @@
|
|
1781 |
{
|
1782 |
"name": "model.layers.23.mlp.down_proj.q_weight",
|
1783 |
"shape": [
|
1784 |
-
|
1785 |
-
|
1786 |
],
|
1787 |
"dtype": "uint32",
|
1788 |
"format": "f32-to-bf16",
|
@@ -1792,8 +1792,8 @@
|
|
1792 |
{
|
1793 |
"name": "model.layers.23.mlp.down_proj.q_scale",
|
1794 |
"shape": [
|
1795 |
-
|
1796 |
-
|
1797 |
],
|
1798 |
"dtype": "float16",
|
1799 |
"format": "f32-to-bf16",
|
@@ -1803,8 +1803,8 @@
|
|
1803 |
{
|
1804 |
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
|
1805 |
"shape": [
|
1806 |
-
|
1807 |
-
|
1808 |
],
|
1809 |
"dtype": "uint32",
|
1810 |
"format": "f32-to-bf16",
|
@@ -1814,8 +1814,8 @@
|
|
1814 |
{
|
1815 |
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
|
1816 |
"shape": [
|
1817 |
-
|
1818 |
-
|
1819 |
],
|
1820 |
"dtype": "float16",
|
1821 |
"format": "f32-to-bf16",
|
@@ -1835,8 +1835,8 @@
|
|
1835 |
{
|
1836 |
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
|
1837 |
"shape": [
|
1838 |
-
|
1839 |
-
|
1840 |
],
|
1841 |
"dtype": "uint32",
|
1842 |
"format": "f32-to-bf16",
|
@@ -1846,8 +1846,8 @@
|
|
1846 |
{
|
1847 |
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
|
1848 |
"shape": [
|
1849 |
-
|
1850 |
-
|
1851 |
],
|
1852 |
"dtype": "float16",
|
1853 |
"format": "f32-to-bf16",
|
@@ -1857,8 +1857,8 @@
|
|
1857 |
{
|
1858 |
"name": "model.layers.23.self_attn.o_proj.q_weight",
|
1859 |
"shape": [
|
1860 |
-
|
1861 |
-
|
1862 |
],
|
1863 |
"dtype": "uint32",
|
1864 |
"format": "f32-to-bf16",
|
@@ -1868,8 +1868,8 @@
|
|
1868 |
{
|
1869 |
"name": "model.layers.23.self_attn.o_proj.q_scale",
|
1870 |
"shape": [
|
1871 |
-
|
1872 |
-
|
1873 |
],
|
1874 |
"dtype": "float16",
|
1875 |
"format": "f32-to-bf16",
|
@@ -1889,8 +1889,8 @@
|
|
1889 |
{
|
1890 |
"name": "model.layers.24.mlp.down_proj.q_weight",
|
1891 |
"shape": [
|
1892 |
-
|
1893 |
-
|
1894 |
],
|
1895 |
"dtype": "uint32",
|
1896 |
"format": "f32-to-bf16",
|
@@ -1900,8 +1900,8 @@
|
|
1900 |
{
|
1901 |
"name": "model.layers.24.mlp.down_proj.q_scale",
|
1902 |
"shape": [
|
1903 |
-
|
1904 |
-
|
1905 |
],
|
1906 |
"dtype": "float16",
|
1907 |
"format": "f32-to-bf16",
|
@@ -1911,8 +1911,8 @@
|
|
1911 |
{
|
1912 |
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
|
1913 |
"shape": [
|
1914 |
-
|
1915 |
-
|
1916 |
],
|
1917 |
"dtype": "uint32",
|
1918 |
"format": "f32-to-bf16",
|
@@ -1922,8 +1922,8 @@
|
|
1922 |
{
|
1923 |
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
|
1924 |
"shape": [
|
1925 |
-
|
1926 |
-
|
1927 |
],
|
1928 |
"dtype": "float16",
|
1929 |
"format": "f32-to-bf16",
|
@@ -1943,8 +1943,8 @@
|
|
1943 |
{
|
1944 |
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
|
1945 |
"shape": [
|
1946 |
-
|
1947 |
-
|
1948 |
],
|
1949 |
"dtype": "uint32",
|
1950 |
"format": "f32-to-bf16",
|
@@ -1954,8 +1954,8 @@
|
|
1954 |
{
|
1955 |
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
|
1956 |
"shape": [
|
1957 |
-
|
1958 |
-
|
1959 |
],
|
1960 |
"dtype": "float16",
|
1961 |
"format": "f32-to-bf16",
|
@@ -1965,8 +1965,8 @@
|
|
1965 |
{
|
1966 |
"name": "model.layers.24.self_attn.o_proj.q_weight",
|
1967 |
"shape": [
|
1968 |
-
|
1969 |
-
|
1970 |
],
|
1971 |
"dtype": "uint32",
|
1972 |
"format": "f32-to-bf16",
|
@@ -1976,8 +1976,8 @@
|
|
1976 |
{
|
1977 |
"name": "model.layers.24.self_attn.o_proj.q_scale",
|
1978 |
"shape": [
|
1979 |
-
|
1980 |
-
|
1981 |
],
|
1982 |
"dtype": "float16",
|
1983 |
"format": "f32-to-bf16",
|
@@ -1997,8 +1997,8 @@
|
|
1997 |
{
|
1998 |
"name": "model.layers.25.mlp.down_proj.q_weight",
|
1999 |
"shape": [
|
2000 |
-
|
2001 |
-
|
2002 |
],
|
2003 |
"dtype": "uint32",
|
2004 |
"format": "f32-to-bf16",
|
@@ -2008,8 +2008,8 @@
|
|
2008 |
{
|
2009 |
"name": "model.layers.25.mlp.down_proj.q_scale",
|
2010 |
"shape": [
|
2011 |
-
|
2012 |
-
|
2013 |
],
|
2014 |
"dtype": "float16",
|
2015 |
"format": "f32-to-bf16",
|
@@ -2019,8 +2019,8 @@
|
|
2019 |
{
|
2020 |
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
|
2021 |
"shape": [
|
2022 |
-
|
2023 |
-
|
2024 |
],
|
2025 |
"dtype": "uint32",
|
2026 |
"format": "f32-to-bf16",
|
@@ -2030,8 +2030,8 @@
|
|
2030 |
{
|
2031 |
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
|
2032 |
"shape": [
|
2033 |
-
|
2034 |
-
|
2035 |
],
|
2036 |
"dtype": "float16",
|
2037 |
"format": "f32-to-bf16",
|
@@ -2051,8 +2051,8 @@
|
|
2051 |
{
|
2052 |
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
|
2053 |
"shape": [
|
2054 |
-
|
2055 |
-
|
2056 |
],
|
2057 |
"dtype": "uint32",
|
2058 |
"format": "f32-to-bf16",
|
@@ -2062,8 +2062,8 @@
|
|
2062 |
{
|
2063 |
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
|
2064 |
"shape": [
|
2065 |
-
|
2066 |
-
|
2067 |
],
|
2068 |
"dtype": "float16",
|
2069 |
"format": "f32-to-bf16",
|
@@ -2073,8 +2073,8 @@
|
|
2073 |
{
|
2074 |
"name": "model.layers.25.self_attn.o_proj.q_weight",
|
2075 |
"shape": [
|
2076 |
-
|
2077 |
-
|
2078 |
],
|
2079 |
"dtype": "uint32",
|
2080 |
"format": "f32-to-bf16",
|
@@ -2084,8 +2084,8 @@
|
|
2084 |
{
|
2085 |
"name": "model.layers.25.self_attn.o_proj.q_scale",
|
2086 |
"shape": [
|
2087 |
-
|
2088 |
-
|
2089 |
],
|
2090 |
"dtype": "float16",
|
2091 |
"format": "f32-to-bf16",
|
@@ -2105,8 +2105,8 @@
|
|
2105 |
{
|
2106 |
"name": "model.layers.26.mlp.down_proj.q_weight",
|
2107 |
"shape": [
|
2108 |
-
|
2109 |
-
|
2110 |
],
|
2111 |
"dtype": "uint32",
|
2112 |
"format": "f32-to-bf16",
|
@@ -2116,8 +2116,8 @@
|
|
2116 |
{
|
2117 |
"name": "model.layers.26.mlp.down_proj.q_scale",
|
2118 |
"shape": [
|
2119 |
-
|
2120 |
-
|
2121 |
],
|
2122 |
"dtype": "float16",
|
2123 |
"format": "f32-to-bf16",
|
@@ -2127,8 +2127,8 @@
|
|
2127 |
{
|
2128 |
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
|
2129 |
"shape": [
|
2130 |
-
|
2131 |
-
|
2132 |
],
|
2133 |
"dtype": "uint32",
|
2134 |
"format": "f32-to-bf16",
|
@@ -2138,8 +2138,8 @@
|
|
2138 |
{
|
2139 |
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
|
2140 |
"shape": [
|
2141 |
-
|
2142 |
-
|
2143 |
],
|
2144 |
"dtype": "float16",
|
2145 |
"format": "f32-to-bf16",
|
@@ -2159,8 +2159,8 @@
|
|
2159 |
{
|
2160 |
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
|
2161 |
"shape": [
|
2162 |
-
|
2163 |
-
|
2164 |
],
|
2165 |
"dtype": "uint32",
|
2166 |
"format": "f32-to-bf16",
|
@@ -2170,8 +2170,8 @@
|
|
2170 |
{
|
2171 |
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
|
2172 |
"shape": [
|
2173 |
-
|
2174 |
-
|
2175 |
],
|
2176 |
"dtype": "float16",
|
2177 |
"format": "f32-to-bf16",
|
@@ -2181,8 +2181,8 @@
|
|
2181 |
{
|
2182 |
"name": "model.layers.26.self_attn.o_proj.q_weight",
|
2183 |
"shape": [
|
2184 |
-
|
2185 |
-
|
2186 |
],
|
2187 |
"dtype": "uint32",
|
2188 |
"format": "f32-to-bf16",
|
@@ -2192,8 +2192,8 @@
|
|
2192 |
{
|
2193 |
"name": "model.layers.26.self_attn.o_proj.q_scale",
|
2194 |
"shape": [
|
2195 |
-
|
2196 |
-
|
2197 |
],
|
2198 |
"dtype": "float16",
|
2199 |
"format": "f32-to-bf16",
|
@@ -2213,8 +2213,8 @@
|
|
2213 |
{
|
2214 |
"name": "model.layers.27.mlp.down_proj.q_weight",
|
2215 |
"shape": [
|
2216 |
-
|
2217 |
-
|
2218 |
],
|
2219 |
"dtype": "uint32",
|
2220 |
"format": "f32-to-bf16",
|
@@ -2224,8 +2224,8 @@
|
|
2224 |
{
|
2225 |
"name": "model.layers.27.mlp.down_proj.q_scale",
|
2226 |
"shape": [
|
2227 |
-
|
2228 |
-
|
2229 |
],
|
2230 |
"dtype": "float16",
|
2231 |
"format": "f32-to-bf16",
|
@@ -2235,8 +2235,8 @@
|
|
2235 |
{
|
2236 |
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
|
2237 |
"shape": [
|
2238 |
-
|
2239 |
-
|
2240 |
],
|
2241 |
"dtype": "uint32",
|
2242 |
"format": "f32-to-bf16",
|
@@ -2246,8 +2246,8 @@
|
|
2246 |
{
|
2247 |
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
|
2248 |
"shape": [
|
2249 |
-
|
2250 |
-
|
2251 |
],
|
2252 |
"dtype": "float16",
|
2253 |
"format": "f32-to-bf16",
|
@@ -2267,8 +2267,8 @@
|
|
2267 |
{
|
2268 |
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
|
2269 |
"shape": [
|
2270 |
-
|
2271 |
-
|
2272 |
],
|
2273 |
"dtype": "uint32",
|
2274 |
"format": "f32-to-bf16",
|
@@ -2278,8 +2278,8 @@
|
|
2278 |
{
|
2279 |
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
|
2280 |
"shape": [
|
2281 |
-
|
2282 |
-
|
2283 |
],
|
2284 |
"dtype": "float16",
|
2285 |
"format": "f32-to-bf16",
|
@@ -2289,8 +2289,8 @@
|
|
2289 |
{
|
2290 |
"name": "model.layers.27.self_attn.o_proj.q_weight",
|
2291 |
"shape": [
|
2292 |
-
|
2293 |
-
|
2294 |
],
|
2295 |
"dtype": "uint32",
|
2296 |
"format": "f32-to-bf16",
|
@@ -2300,8 +2300,8 @@
|
|
2300 |
{
|
2301 |
"name": "model.layers.27.self_attn.o_proj.q_scale",
|
2302 |
"shape": [
|
2303 |
-
|
2304 |
-
|
2305 |
],
|
2306 |
"dtype": "float16",
|
2307 |
"format": "f32-to-bf16",
|
@@ -2321,8 +2321,8 @@
|
|
2321 |
{
|
2322 |
"name": "model.layers.28.mlp.down_proj.q_weight",
|
2323 |
"shape": [
|
2324 |
-
|
2325 |
-
|
2326 |
],
|
2327 |
"dtype": "uint32",
|
2328 |
"format": "f32-to-bf16",
|
@@ -2332,8 +2332,8 @@
|
|
2332 |
{
|
2333 |
"name": "model.layers.28.mlp.down_proj.q_scale",
|
2334 |
"shape": [
|
2335 |
-
|
2336 |
-
|
2337 |
],
|
2338 |
"dtype": "float16",
|
2339 |
"format": "f32-to-bf16",
|
@@ -2343,8 +2343,8 @@
|
|
2343 |
{
|
2344 |
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
|
2345 |
"shape": [
|
2346 |
-
|
2347 |
-
|
2348 |
],
|
2349 |
"dtype": "uint32",
|
2350 |
"format": "f32-to-bf16",
|
@@ -2354,8 +2354,8 @@
|
|
2354 |
{
|
2355 |
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
|
2356 |
"shape": [
|
2357 |
-
|
2358 |
-
|
2359 |
],
|
2360 |
"dtype": "float16",
|
2361 |
"format": "f32-to-bf16",
|
@@ -2375,8 +2375,8 @@
|
|
2375 |
{
|
2376 |
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
|
2377 |
"shape": [
|
2378 |
-
|
2379 |
-
|
2380 |
],
|
2381 |
"dtype": "uint32",
|
2382 |
"format": "f32-to-bf16",
|
@@ -2386,8 +2386,8 @@
|
|
2386 |
{
|
2387 |
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
|
2388 |
"shape": [
|
2389 |
-
|
2390 |
-
|
2391 |
],
|
2392 |
"dtype": "float16",
|
2393 |
"format": "f32-to-bf16",
|
@@ -2397,8 +2397,8 @@
|
|
2397 |
{
|
2398 |
"name": "model.layers.28.self_attn.o_proj.q_weight",
|
2399 |
"shape": [
|
2400 |
-
|
2401 |
-
|
2402 |
],
|
2403 |
"dtype": "uint32",
|
2404 |
"format": "f32-to-bf16",
|
@@ -2408,8 +2408,8 @@
|
|
2408 |
{
|
2409 |
"name": "model.layers.28.self_attn.o_proj.q_scale",
|
2410 |
"shape": [
|
2411 |
-
|
2412 |
-
|
2413 |
],
|
2414 |
"dtype": "float16",
|
2415 |
"format": "f32-to-bf16",
|
@@ -2429,8 +2429,8 @@
|
|
2429 |
{
|
2430 |
"name": "model.layers.29.mlp.down_proj.q_weight",
|
2431 |
"shape": [
|
2432 |
-
|
2433 |
-
|
2434 |
],
|
2435 |
"dtype": "uint32",
|
2436 |
"format": "f32-to-bf16",
|
@@ -2440,8 +2440,8 @@
|
|
2440 |
{
|
2441 |
"name": "model.layers.29.mlp.down_proj.q_scale",
|
2442 |
"shape": [
|
2443 |
-
|
2444 |
-
|
2445 |
],
|
2446 |
"dtype": "float16",
|
2447 |
"format": "f32-to-bf16",
|
@@ -2451,8 +2451,8 @@
|
|
2451 |
{
|
2452 |
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
|
2453 |
"shape": [
|
2454 |
-
|
2455 |
-
|
2456 |
],
|
2457 |
"dtype": "uint32",
|
2458 |
"format": "f32-to-bf16",
|
@@ -2462,8 +2462,8 @@
|
|
2462 |
{
|
2463 |
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
|
2464 |
"shape": [
|
2465 |
-
|
2466 |
-
|
2467 |
],
|
2468 |
"dtype": "float16",
|
2469 |
"format": "f32-to-bf16",
|
@@ -2483,8 +2483,8 @@
|
|
2483 |
{
|
2484 |
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
|
2485 |
"shape": [
|
2486 |
-
|
2487 |
-
|
2488 |
],
|
2489 |
"dtype": "uint32",
|
2490 |
"format": "f32-to-bf16",
|
@@ -2494,8 +2494,8 @@
|
|
2494 |
{
|
2495 |
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
|
2496 |
"shape": [
|
2497 |
-
|
2498 |
-
|
2499 |
],
|
2500 |
"dtype": "float16",
|
2501 |
"format": "f32-to-bf16",
|
@@ -2505,8 +2505,8 @@
|
|
2505 |
{
|
2506 |
"name": "model.layers.29.self_attn.o_proj.q_weight",
|
2507 |
"shape": [
|
2508 |
-
|
2509 |
-
|
2510 |
],
|
2511 |
"dtype": "uint32",
|
2512 |
"format": "f32-to-bf16",
|
@@ -2516,8 +2516,8 @@
|
|
2516 |
{
|
2517 |
"name": "model.layers.29.self_attn.o_proj.q_scale",
|
2518 |
"shape": [
|
2519 |
-
|
2520 |
-
|
2521 |
],
|
2522 |
"dtype": "float16",
|
2523 |
"format": "f32-to-bf16",
|
@@ -2537,8 +2537,8 @@
|
|
2537 |
{
|
2538 |
"name": "model.layers.3.mlp.down_proj.q_weight",
|
2539 |
"shape": [
|
2540 |
-
|
2541 |
-
|
2542 |
],
|
2543 |
"dtype": "uint32",
|
2544 |
"format": "f32-to-bf16",
|
@@ -2548,8 +2548,8 @@
|
|
2548 |
{
|
2549 |
"name": "model.layers.3.mlp.down_proj.q_scale",
|
2550 |
"shape": [
|
2551 |
-
|
2552 |
-
|
2553 |
],
|
2554 |
"dtype": "float16",
|
2555 |
"format": "f32-to-bf16",
|
@@ -2559,8 +2559,8 @@
|
|
2559 |
{
|
2560 |
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
|
2561 |
"shape": [
|
2562 |
-
|
2563 |
-
|
2564 |
],
|
2565 |
"dtype": "uint32",
|
2566 |
"format": "f32-to-bf16",
|
@@ -2570,8 +2570,8 @@
|
|
2570 |
{
|
2571 |
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
|
2572 |
"shape": [
|
2573 |
-
|
2574 |
-
|
2575 |
],
|
2576 |
"dtype": "float16",
|
2577 |
"format": "f32-to-bf16",
|
@@ -2591,8 +2591,8 @@
|
|
2591 |
{
|
2592 |
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
|
2593 |
"shape": [
|
2594 |
-
|
2595 |
-
|
2596 |
],
|
2597 |
"dtype": "uint32",
|
2598 |
"format": "f32-to-bf16",
|
@@ -2602,8 +2602,8 @@
|
|
2602 |
{
|
2603 |
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
|
2604 |
"shape": [
|
2605 |
-
|
2606 |
-
|
2607 |
],
|
2608 |
"dtype": "float16",
|
2609 |
"format": "f32-to-bf16",
|
@@ -2613,8 +2613,8 @@
|
|
2613 |
{
|
2614 |
"name": "model.layers.3.self_attn.o_proj.q_weight",
|
2615 |
"shape": [
|
2616 |
-
|
2617 |
-
|
2618 |
],
|
2619 |
"dtype": "uint32",
|
2620 |
"format": "f32-to-bf16",
|
@@ -2624,8 +2624,8 @@
|
|
2624 |
{
|
2625 |
"name": "model.layers.3.self_attn.o_proj.q_scale",
|
2626 |
"shape": [
|
2627 |
-
|
2628 |
-
|
2629 |
],
|
2630 |
"dtype": "float16",
|
2631 |
"format": "f32-to-bf16",
|
@@ -2645,8 +2645,8 @@
|
|
2645 |
{
|
2646 |
"name": "model.layers.4.mlp.down_proj.q_weight",
|
2647 |
"shape": [
|
2648 |
-
|
2649 |
-
|
2650 |
],
|
2651 |
"dtype": "uint32",
|
2652 |
"format": "f32-to-bf16",
|
@@ -2656,8 +2656,8 @@
|
|
2656 |
{
|
2657 |
"name": "model.layers.4.mlp.down_proj.q_scale",
|
2658 |
"shape": [
|
2659 |
-
|
2660 |
-
|
2661 |
],
|
2662 |
"dtype": "float16",
|
2663 |
"format": "f32-to-bf16",
|
@@ -2667,8 +2667,8 @@
|
|
2667 |
{
|
2668 |
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
|
2669 |
"shape": [
|
2670 |
-
|
2671 |
-
|
2672 |
],
|
2673 |
"dtype": "uint32",
|
2674 |
"format": "f32-to-bf16",
|
@@ -2678,8 +2678,8 @@
|
|
2678 |
{
|
2679 |
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
|
2680 |
"shape": [
|
2681 |
-
|
2682 |
-
|
2683 |
],
|
2684 |
"dtype": "float16",
|
2685 |
"format": "f32-to-bf16",
|
@@ -2699,8 +2699,8 @@
|
|
2699 |
{
|
2700 |
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
|
2701 |
"shape": [
|
2702 |
-
|
2703 |
-
|
2704 |
],
|
2705 |
"dtype": "uint32",
|
2706 |
"format": "f32-to-bf16",
|
@@ -2710,8 +2710,8 @@
|
|
2710 |
{
|
2711 |
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
|
2712 |
"shape": [
|
2713 |
-
|
2714 |
-
|
2715 |
],
|
2716 |
"dtype": "float16",
|
2717 |
"format": "f32-to-bf16",
|
@@ -2721,8 +2721,8 @@
|
|
2721 |
{
|
2722 |
"name": "model.layers.4.self_attn.o_proj.q_weight",
|
2723 |
"shape": [
|
2724 |
-
|
2725 |
-
|
2726 |
],
|
2727 |
"dtype": "uint32",
|
2728 |
"format": "f32-to-bf16",
|
@@ -2732,8 +2732,8 @@
|
|
2732 |
{
|
2733 |
"name": "model.layers.4.self_attn.o_proj.q_scale",
|
2734 |
"shape": [
|
2735 |
-
|
2736 |
-
|
2737 |
],
|
2738 |
"dtype": "float16",
|
2739 |
"format": "f32-to-bf16",
|
@@ -2753,8 +2753,8 @@
|
|
2753 |
{
|
2754 |
"name": "model.layers.5.mlp.down_proj.q_weight",
|
2755 |
"shape": [
|
2756 |
-
|
2757 |
-
|
2758 |
],
|
2759 |
"dtype": "uint32",
|
2760 |
"format": "f32-to-bf16",
|
@@ -2764,8 +2764,8 @@
|
|
2764 |
{
|
2765 |
"name": "model.layers.5.mlp.down_proj.q_scale",
|
2766 |
"shape": [
|
2767 |
-
|
2768 |
-
|
2769 |
],
|
2770 |
"dtype": "float16",
|
2771 |
"format": "f32-to-bf16",
|
@@ -2773,7 +2773,7 @@
|
|
2773 |
"byteOffset": 32828544
|
2774 |
}
|
2775 |
],
|
2776 |
-
"md5sum": "
|
2777 |
},
|
2778 |
{
|
2779 |
"dataPath": "params_shard_2.bin",
|
@@ -2783,8 +2783,8 @@
|
|
2783 |
{
|
2784 |
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
|
2785 |
"shape": [
|
2786 |
-
|
2787 |
-
|
2788 |
],
|
2789 |
"dtype": "uint32",
|
2790 |
"format": "f32-to-bf16",
|
@@ -2794,8 +2794,8 @@
|
|
2794 |
{
|
2795 |
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
|
2796 |
"shape": [
|
2797 |
-
|
2798 |
-
|
2799 |
],
|
2800 |
"dtype": "float16",
|
2801 |
"format": "f32-to-bf16",
|
@@ -2815,8 +2815,8 @@
|
|
2815 |
{
|
2816 |
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
|
2817 |
"shape": [
|
2818 |
-
|
2819 |
-
|
2820 |
],
|
2821 |
"dtype": "uint32",
|
2822 |
"format": "f32-to-bf16",
|
@@ -2826,8 +2826,8 @@
|
|
2826 |
{
|
2827 |
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
|
2828 |
"shape": [
|
2829 |
-
|
2830 |
-
|
2831 |
],
|
2832 |
"dtype": "float16",
|
2833 |
"format": "f32-to-bf16",
|
@@ -2837,8 +2837,8 @@
|
|
2837 |
{
|
2838 |
"name": "model.layers.5.self_attn.o_proj.q_weight",
|
2839 |
"shape": [
|
2840 |
-
|
2841 |
-
|
2842 |
],
|
2843 |
"dtype": "uint32",
|
2844 |
"format": "f32-to-bf16",
|
@@ -2848,8 +2848,8 @@
|
|
2848 |
{
|
2849 |
"name": "model.layers.5.self_attn.o_proj.q_scale",
|
2850 |
"shape": [
|
2851 |
-
|
2852 |
-
|
2853 |
],
|
2854 |
"dtype": "float16",
|
2855 |
"format": "f32-to-bf16",
|
@@ -2869,8 +2869,8 @@
|
|
2869 |
{
|
2870 |
"name": "model.layers.6.mlp.down_proj.q_weight",
|
2871 |
"shape": [
|
2872 |
-
|
2873 |
-
|
2874 |
],
|
2875 |
"dtype": "uint32",
|
2876 |
"format": "f32-to-bf16",
|
@@ -2880,8 +2880,8 @@
|
|
2880 |
{
|
2881 |
"name": "model.layers.6.mlp.down_proj.q_scale",
|
2882 |
"shape": [
|
2883 |
-
|
2884 |
-
|
2885 |
],
|
2886 |
"dtype": "float16",
|
2887 |
"format": "f32-to-bf16",
|
@@ -2891,8 +2891,8 @@
|
|
2891 |
{
|
2892 |
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
|
2893 |
"shape": [
|
2894 |
-
|
2895 |
-
|
2896 |
],
|
2897 |
"dtype": "uint32",
|
2898 |
"format": "f32-to-bf16",
|
@@ -2902,8 +2902,8 @@
|
|
2902 |
{
|
2903 |
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
|
2904 |
"shape": [
|
2905 |
-
|
2906 |
-
|
2907 |
],
|
2908 |
"dtype": "float16",
|
2909 |
"format": "f32-to-bf16",
|
@@ -2923,8 +2923,8 @@
|
|
2923 |
{
|
2924 |
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
|
2925 |
"shape": [
|
2926 |
-
|
2927 |
-
|
2928 |
],
|
2929 |
"dtype": "uint32",
|
2930 |
"format": "f32-to-bf16",
|
@@ -2934,8 +2934,8 @@
|
|
2934 |
{
|
2935 |
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
|
2936 |
"shape": [
|
2937 |
-
|
2938 |
-
|
2939 |
],
|
2940 |
"dtype": "float16",
|
2941 |
"format": "f32-to-bf16",
|
@@ -2945,8 +2945,8 @@
|
|
2945 |
{
|
2946 |
"name": "model.layers.6.self_attn.o_proj.q_weight",
|
2947 |
"shape": [
|
2948 |
-
|
2949 |
-
|
2950 |
],
|
2951 |
"dtype": "uint32",
|
2952 |
"format": "f32-to-bf16",
|
@@ -2956,8 +2956,8 @@
|
|
2956 |
{
|
2957 |
"name": "model.layers.6.self_attn.o_proj.q_scale",
|
2958 |
"shape": [
|
2959 |
-
|
2960 |
-
|
2961 |
],
|
2962 |
"dtype": "float16",
|
2963 |
"format": "f32-to-bf16",
|
@@ -2977,8 +2977,8 @@
|
|
2977 |
{
|
2978 |
"name": "model.layers.7.mlp.down_proj.q_weight",
|
2979 |
"shape": [
|
2980 |
-
|
2981 |
-
|
2982 |
],
|
2983 |
"dtype": "uint32",
|
2984 |
"format": "f32-to-bf16",
|
@@ -2988,8 +2988,8 @@
|
|
2988 |
{
|
2989 |
"name": "model.layers.7.mlp.down_proj.q_scale",
|
2990 |
"shape": [
|
2991 |
-
|
2992 |
-
|
2993 |
],
|
2994 |
"dtype": "float16",
|
2995 |
"format": "f32-to-bf16",
|
@@ -2999,8 +2999,8 @@
|
|
2999 |
{
|
3000 |
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
|
3001 |
"shape": [
|
3002 |
-
|
3003 |
-
|
3004 |
],
|
3005 |
"dtype": "uint32",
|
3006 |
"format": "f32-to-bf16",
|
@@ -3010,8 +3010,8 @@
|
|
3010 |
{
|
3011 |
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
|
3012 |
"shape": [
|
3013 |
-
|
3014 |
-
|
3015 |
],
|
3016 |
"dtype": "float16",
|
3017 |
"format": "f32-to-bf16",
|
@@ -3031,8 +3031,8 @@
|
|
3031 |
{
|
3032 |
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
|
3033 |
"shape": [
|
3034 |
-
|
3035 |
-
|
3036 |
],
|
3037 |
"dtype": "uint32",
|
3038 |
"format": "f32-to-bf16",
|
@@ -3042,8 +3042,8 @@
|
|
3042 |
{
|
3043 |
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
|
3044 |
"shape": [
|
3045 |
-
|
3046 |
-
|
3047 |
],
|
3048 |
"dtype": "float16",
|
3049 |
"format": "f32-to-bf16",
|
@@ -3053,8 +3053,8 @@
|
|
3053 |
{
|
3054 |
"name": "model.layers.7.self_attn.o_proj.q_weight",
|
3055 |
"shape": [
|
3056 |
-
|
3057 |
-
|
3058 |
],
|
3059 |
"dtype": "uint32",
|
3060 |
"format": "f32-to-bf16",
|
@@ -3064,8 +3064,8 @@
|
|
3064 |
{
|
3065 |
"name": "model.layers.7.self_attn.o_proj.q_scale",
|
3066 |
"shape": [
|
3067 |
-
|
3068 |
-
|
3069 |
],
|
3070 |
"dtype": "float16",
|
3071 |
"format": "f32-to-bf16",
|
@@ -3085,8 +3085,8 @@
|
|
3085 |
{
|
3086 |
"name": "model.layers.8.mlp.down_proj.q_weight",
|
3087 |
"shape": [
|
3088 |
-
|
3089 |
-
|
3090 |
],
|
3091 |
"dtype": "uint32",
|
3092 |
"format": "f32-to-bf16",
|
@@ -3096,8 +3096,8 @@
|
|
3096 |
{
|
3097 |
"name": "model.layers.8.mlp.down_proj.q_scale",
|
3098 |
"shape": [
|
3099 |
-
|
3100 |
-
|
3101 |
],
|
3102 |
"dtype": "float16",
|
3103 |
"format": "f32-to-bf16",
|
@@ -3107,8 +3107,8 @@
|
|
3107 |
{
|
3108 |
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
|
3109 |
"shape": [
|
3110 |
-
|
3111 |
-
|
3112 |
],
|
3113 |
"dtype": "uint32",
|
3114 |
"format": "f32-to-bf16",
|
@@ -3118,8 +3118,8 @@
|
|
3118 |
{
|
3119 |
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
|
3120 |
"shape": [
|
3121 |
-
|
3122 |
-
|
3123 |
],
|
3124 |
"dtype": "float16",
|
3125 |
"format": "f32-to-bf16",
|
@@ -3139,8 +3139,8 @@
|
|
3139 |
{
|
3140 |
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
|
3141 |
"shape": [
|
3142 |
-
|
3143 |
-
|
3144 |
],
|
3145 |
"dtype": "uint32",
|
3146 |
"format": "f32-to-bf16",
|
@@ -3150,8 +3150,8 @@
|
|
3150 |
{
|
3151 |
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
|
3152 |
"shape": [
|
3153 |
-
|
3154 |
-
|
3155 |
],
|
3156 |
"dtype": "float16",
|
3157 |
"format": "f32-to-bf16",
|
@@ -3161,8 +3161,8 @@
|
|
3161 |
{
|
3162 |
"name": "model.layers.8.self_attn.o_proj.q_weight",
|
3163 |
"shape": [
|
3164 |
-
|
3165 |
-
|
3166 |
],
|
3167 |
"dtype": "uint32",
|
3168 |
"format": "f32-to-bf16",
|
@@ -3172,8 +3172,8 @@
|
|
3172 |
{
|
3173 |
"name": "model.layers.8.self_attn.o_proj.q_scale",
|
3174 |
"shape": [
|
3175 |
-
|
3176 |
-
|
3177 |
],
|
3178 |
"dtype": "float16",
|
3179 |
"format": "f32-to-bf16",
|
@@ -3193,8 +3193,8 @@
|
|
3193 |
{
|
3194 |
"name": "model.layers.9.mlp.down_proj.q_weight",
|
3195 |
"shape": [
|
3196 |
-
|
3197 |
-
|
3198 |
],
|
3199 |
"dtype": "uint32",
|
3200 |
"format": "f32-to-bf16",
|
@@ -3204,8 +3204,8 @@
|
|
3204 |
{
|
3205 |
"name": "model.layers.9.mlp.down_proj.q_scale",
|
3206 |
"shape": [
|
3207 |
-
|
3208 |
-
|
3209 |
],
|
3210 |
"dtype": "float16",
|
3211 |
"format": "f32-to-bf16",
|
@@ -3215,8 +3215,8 @@
|
|
3215 |
{
|
3216 |
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
|
3217 |
"shape": [
|
3218 |
-
|
3219 |
-
|
3220 |
],
|
3221 |
"dtype": "uint32",
|
3222 |
"format": "f32-to-bf16",
|
@@ -3226,8 +3226,8 @@
|
|
3226 |
{
|
3227 |
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
|
3228 |
"shape": [
|
3229 |
-
|
3230 |
-
|
3231 |
],
|
3232 |
"dtype": "float16",
|
3233 |
"format": "f32-to-bf16",
|
@@ -3247,8 +3247,8 @@
|
|
3247 |
{
|
3248 |
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
|
3249 |
"shape": [
|
3250 |
-
|
3251 |
-
|
3252 |
],
|
3253 |
"dtype": "uint32",
|
3254 |
"format": "f32-to-bf16",
|
@@ -3258,8 +3258,8 @@
|
|
3258 |
{
|
3259 |
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
|
3260 |
"shape": [
|
3261 |
-
|
3262 |
-
|
3263 |
],
|
3264 |
"dtype": "float16",
|
3265 |
"format": "f32-to-bf16",
|
@@ -3269,8 +3269,8 @@
|
|
3269 |
{
|
3270 |
"name": "model.layers.9.self_attn.o_proj.q_weight",
|
3271 |
"shape": [
|
3272 |
-
|
3273 |
-
|
3274 |
],
|
3275 |
"dtype": "uint32",
|
3276 |
"format": "f32-to-bf16",
|
@@ -3280,8 +3280,8 @@
|
|
3280 |
{
|
3281 |
"name": "model.layers.9.self_attn.o_proj.q_scale",
|
3282 |
"shape": [
|
3283 |
-
|
3284 |
-
|
3285 |
],
|
3286 |
"dtype": "float16",
|
3287 |
"format": "f32-to-bf16",
|
@@ -3299,7 +3299,7 @@
|
|
3299 |
"byteOffset": 9465984
|
3300 |
}
|
3301 |
],
|
3302 |
-
"md5sum": "
|
3303 |
}
|
3304 |
]
|
3305 |
}
|
|
|
45 |
{
|
46 |
"name": "model.layers.0.mlp.down_proj.q_weight",
|
47 |
"shape": [
|
48 |
+
576,
|
49 |
+
192
|
50 |
],
|
51 |
"dtype": "uint32",
|
52 |
"format": "f32-to-bf16",
|
|
|
56 |
{
|
57 |
"name": "model.layers.0.mlp.down_proj.q_scale",
|
58 |
"shape": [
|
59 |
+
576,
|
60 |
+
48
|
61 |
],
|
62 |
"dtype": "float16",
|
63 |
"format": "f32-to-bf16",
|
|
|
67 |
{
|
68 |
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
|
69 |
"shape": [
|
70 |
+
3072,
|
71 |
+
72
|
72 |
],
|
73 |
"dtype": "uint32",
|
74 |
"format": "f32-to-bf16",
|
|
|
78 |
{
|
79 |
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
|
80 |
"shape": [
|
81 |
+
3072,
|
82 |
+
18
|
83 |
],
|
84 |
"dtype": "float16",
|
85 |
"format": "f32-to-bf16",
|
|
|
99 |
{
|
100 |
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
|
101 |
"shape": [
|
102 |
+
960,
|
103 |
+
72
|
104 |
],
|
105 |
"dtype": "uint32",
|
106 |
"format": "f32-to-bf16",
|
|
|
110 |
{
|
111 |
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
|
112 |
"shape": [
|
113 |
+
960,
|
114 |
+
18
|
115 |
],
|
116 |
"dtype": "float16",
|
117 |
"format": "f32-to-bf16",
|
|
|
121 |
{
|
122 |
"name": "model.layers.0.self_attn.o_proj.q_weight",
|
123 |
"shape": [
|
124 |
+
576,
|
125 |
+
72
|
126 |
],
|
127 |
"dtype": "uint32",
|
128 |
"format": "f32-to-bf16",
|
|
|
132 |
{
|
133 |
"name": "model.layers.0.self_attn.o_proj.q_scale",
|
134 |
"shape": [
|
135 |
+
576,
|
136 |
+
18
|
137 |
],
|
138 |
"dtype": "float16",
|
139 |
"format": "f32-to-bf16",
|
|
|
153 |
{
|
154 |
"name": "model.layers.1.mlp.down_proj.q_weight",
|
155 |
"shape": [
|
156 |
+
576,
|
157 |
+
192
|
158 |
],
|
159 |
"dtype": "uint32",
|
160 |
"format": "f32-to-bf16",
|
|
|
164 |
{
|
165 |
"name": "model.layers.1.mlp.down_proj.q_scale",
|
166 |
"shape": [
|
167 |
+
576,
|
168 |
+
48
|
169 |
],
|
170 |
"dtype": "float16",
|
171 |
"format": "f32-to-bf16",
|
|
|
175 |
{
|
176 |
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
|
177 |
"shape": [
|
178 |
+
3072,
|
179 |
+
72
|
180 |
],
|
181 |
"dtype": "uint32",
|
182 |
"format": "f32-to-bf16",
|
|
|
186 |
{
|
187 |
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
|
188 |
"shape": [
|
189 |
+
3072,
|
190 |
+
18
|
191 |
],
|
192 |
"dtype": "float16",
|
193 |
"format": "f32-to-bf16",
|
|
|
207 |
{
|
208 |
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
|
209 |
"shape": [
|
210 |
+
960,
|
211 |
+
72
|
212 |
],
|
213 |
"dtype": "uint32",
|
214 |
"format": "f32-to-bf16",
|
|
|
218 |
{
|
219 |
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
|
220 |
"shape": [
|
221 |
+
960,
|
222 |
+
18
|
223 |
],
|
224 |
"dtype": "float16",
|
225 |
"format": "f32-to-bf16",
|
|
|
229 |
{
|
230 |
"name": "model.layers.1.self_attn.o_proj.q_weight",
|
231 |
"shape": [
|
232 |
+
576,
|
233 |
+
72
|
234 |
],
|
235 |
"dtype": "uint32",
|
236 |
"format": "f32-to-bf16",
|
|
|
240 |
{
|
241 |
"name": "model.layers.1.self_attn.o_proj.q_scale",
|
242 |
"shape": [
|
243 |
+
576,
|
244 |
+
18
|
245 |
],
|
246 |
"dtype": "float16",
|
247 |
"format": "f32-to-bf16",
|
|
|
261 |
{
|
262 |
"name": "model.layers.10.mlp.down_proj.q_weight",
|
263 |
"shape": [
|
264 |
+
576,
|
265 |
+
192
|
266 |
],
|
267 |
"dtype": "uint32",
|
268 |
"format": "f32-to-bf16",
|
|
|
272 |
{
|
273 |
"name": "model.layers.10.mlp.down_proj.q_scale",
|
274 |
"shape": [
|
275 |
+
576,
|
276 |
+
48
|
277 |
],
|
278 |
"dtype": "float16",
|
279 |
"format": "f32-to-bf16",
|
|
|
283 |
{
|
284 |
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
|
285 |
"shape": [
|
286 |
+
3072,
|
287 |
+
72
|
288 |
],
|
289 |
"dtype": "uint32",
|
290 |
"format": "f32-to-bf16",
|
|
|
294 |
{
|
295 |
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
|
296 |
"shape": [
|
297 |
+
3072,
|
298 |
+
18
|
299 |
],
|
300 |
"dtype": "float16",
|
301 |
"format": "f32-to-bf16",
|
|
|
315 |
{
|
316 |
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
|
317 |
"shape": [
|
318 |
+
960,
|
319 |
+
72
|
320 |
],
|
321 |
"dtype": "uint32",
|
322 |
"format": "f32-to-bf16",
|
|
|
326 |
{
|
327 |
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
|
328 |
"shape": [
|
329 |
+
960,
|
330 |
+
18
|
331 |
],
|
332 |
"dtype": "float16",
|
333 |
"format": "f32-to-bf16",
|
|
|
337 |
{
|
338 |
"name": "model.layers.10.self_attn.o_proj.q_weight",
|
339 |
"shape": [
|
340 |
+
576,
|
341 |
+
72
|
342 |
],
|
343 |
"dtype": "uint32",
|
344 |
"format": "f32-to-bf16",
|
|
|
348 |
{
|
349 |
"name": "model.layers.10.self_attn.o_proj.q_scale",
|
350 |
"shape": [
|
351 |
+
576,
|
352 |
+
18
|
353 |
],
|
354 |
"dtype": "float16",
|
355 |
"format": "f32-to-bf16",
|
|
|
369 |
{
|
370 |
"name": "model.layers.11.mlp.down_proj.q_weight",
|
371 |
"shape": [
|
372 |
+
576,
|
373 |
+
192
|
374 |
],
|
375 |
"dtype": "uint32",
|
376 |
"format": "f32-to-bf16",
|
|
|
380 |
{
|
381 |
"name": "model.layers.11.mlp.down_proj.q_scale",
|
382 |
"shape": [
|
383 |
+
576,
|
384 |
+
48
|
385 |
],
|
386 |
"dtype": "float16",
|
387 |
"format": "f32-to-bf16",
|
|
|
391 |
{
|
392 |
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
|
393 |
"shape": [
|
394 |
+
3072,
|
395 |
+
72
|
396 |
],
|
397 |
"dtype": "uint32",
|
398 |
"format": "f32-to-bf16",
|
|
|
402 |
{
|
403 |
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
|
404 |
"shape": [
|
405 |
+
3072,
|
406 |
+
18
|
407 |
],
|
408 |
"dtype": "float16",
|
409 |
"format": "f32-to-bf16",
|
|
|
423 |
{
|
424 |
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
|
425 |
"shape": [
|
426 |
+
960,
|
427 |
+
72
|
428 |
],
|
429 |
"dtype": "uint32",
|
430 |
"format": "f32-to-bf16",
|
|
|
434 |
{
|
435 |
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
|
436 |
"shape": [
|
437 |
+
960,
|
438 |
+
18
|
439 |
],
|
440 |
"dtype": "float16",
|
441 |
"format": "f32-to-bf16",
|
|
|
445 |
{
|
446 |
"name": "model.layers.11.self_attn.o_proj.q_weight",
|
447 |
"shape": [
|
448 |
+
576,
|
449 |
+
72
|
450 |
],
|
451 |
"dtype": "uint32",
|
452 |
"format": "f32-to-bf16",
|
|
|
456 |
{
|
457 |
"name": "model.layers.11.self_attn.o_proj.q_scale",
|
458 |
"shape": [
|
459 |
+
576,
|
460 |
+
18
|
461 |
],
|
462 |
"dtype": "float16",
|
463 |
"format": "f32-to-bf16",
|
|
|
477 |
{
|
478 |
"name": "model.layers.12.mlp.down_proj.q_weight",
|
479 |
"shape": [
|
480 |
+
576,
|
481 |
+
192
|
482 |
],
|
483 |
"dtype": "uint32",
|
484 |
"format": "f32-to-bf16",
|
|
|
488 |
{
|
489 |
"name": "model.layers.12.mlp.down_proj.q_scale",
|
490 |
"shape": [
|
491 |
+
576,
|
492 |
+
48
|
493 |
],
|
494 |
"dtype": "float16",
|
495 |
"format": "f32-to-bf16",
|
|
|
499 |
{
|
500 |
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
|
501 |
"shape": [
|
502 |
+
3072,
|
503 |
+
72
|
504 |
],
|
505 |
"dtype": "uint32",
|
506 |
"format": "f32-to-bf16",
|
|
|
510 |
{
|
511 |
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
|
512 |
"shape": [
|
513 |
+
3072,
|
514 |
+
18
|
515 |
],
|
516 |
"dtype": "float16",
|
517 |
"format": "f32-to-bf16",
|
|
|
531 |
{
|
532 |
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
|
533 |
"shape": [
|
534 |
+
960,
|
535 |
+
72
|
536 |
],
|
537 |
"dtype": "uint32",
|
538 |
"format": "f32-to-bf16",
|
|
|
542 |
{
|
543 |
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
|
544 |
"shape": [
|
545 |
+
960,
|
546 |
+
18
|
547 |
],
|
548 |
"dtype": "float16",
|
549 |
"format": "f32-to-bf16",
|
|
|
553 |
{
|
554 |
"name": "model.layers.12.self_attn.o_proj.q_weight",
|
555 |
"shape": [
|
556 |
+
576,
|
557 |
+
72
|
558 |
],
|
559 |
"dtype": "uint32",
|
560 |
"format": "f32-to-bf16",
|
|
|
564 |
{
|
565 |
"name": "model.layers.12.self_attn.o_proj.q_scale",
|
566 |
"shape": [
|
567 |
+
576,
|
568 |
+
18
|
569 |
],
|
570 |
"dtype": "float16",
|
571 |
"format": "f32-to-bf16",
|
|
|
585 |
{
|
586 |
"name": "model.layers.13.mlp.down_proj.q_weight",
|
587 |
"shape": [
|
588 |
+
576,
|
589 |
+
192
|
590 |
],
|
591 |
"dtype": "uint32",
|
592 |
"format": "f32-to-bf16",
|
|
|
596 |
{
|
597 |
"name": "model.layers.13.mlp.down_proj.q_scale",
|
598 |
"shape": [
|
599 |
+
576,
|
600 |
+
48
|
601 |
],
|
602 |
"dtype": "float16",
|
603 |
"format": "f32-to-bf16",
|
|
|
607 |
{
|
608 |
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
|
609 |
"shape": [
|
610 |
+
3072,
|
611 |
+
72
|
612 |
],
|
613 |
"dtype": "uint32",
|
614 |
"format": "f32-to-bf16",
|
|
|
618 |
{
|
619 |
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
|
620 |
"shape": [
|
621 |
+
3072,
|
622 |
+
18
|
623 |
],
|
624 |
"dtype": "float16",
|
625 |
"format": "f32-to-bf16",
|
|
|
639 |
{
|
640 |
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
|
641 |
"shape": [
|
642 |
+
960,
|
643 |
+
72
|
644 |
],
|
645 |
"dtype": "uint32",
|
646 |
"format": "f32-to-bf16",
|
|
|
650 |
{
|
651 |
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
|
652 |
"shape": [
|
653 |
+
960,
|
654 |
+
18
|
655 |
],
|
656 |
"dtype": "float16",
|
657 |
"format": "f32-to-bf16",
|
|
|
661 |
{
|
662 |
"name": "model.layers.13.self_attn.o_proj.q_weight",
|
663 |
"shape": [
|
664 |
+
576,
|
665 |
+
72
|
666 |
],
|
667 |
"dtype": "uint32",
|
668 |
"format": "f32-to-bf16",
|
|
|
672 |
{
|
673 |
"name": "model.layers.13.self_attn.o_proj.q_scale",
|
674 |
"shape": [
|
675 |
+
576,
|
676 |
+
18
|
677 |
],
|
678 |
"dtype": "float16",
|
679 |
"format": "f32-to-bf16",
|
|
|
693 |
{
|
694 |
"name": "model.layers.14.mlp.down_proj.q_weight",
|
695 |
"shape": [
|
696 |
+
576,
|
697 |
+
192
|
698 |
],
|
699 |
"dtype": "uint32",
|
700 |
"format": "f32-to-bf16",
|
|
|
704 |
{
|
705 |
"name": "model.layers.14.mlp.down_proj.q_scale",
|
706 |
"shape": [
|
707 |
+
576,
|
708 |
+
48
|
709 |
],
|
710 |
"dtype": "float16",
|
711 |
"format": "f32-to-bf16",
|
|
|
715 |
{
|
716 |
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
|
717 |
"shape": [
|
718 |
+
3072,
|
719 |
+
72
|
720 |
],
|
721 |
"dtype": "uint32",
|
722 |
"format": "f32-to-bf16",
|
|
|
726 |
{
|
727 |
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
|
728 |
"shape": [
|
729 |
+
3072,
|
730 |
+
18
|
731 |
],
|
732 |
"dtype": "float16",
|
733 |
"format": "f32-to-bf16",
|
|
|
747 |
{
|
748 |
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
|
749 |
"shape": [
|
750 |
+
960,
|
751 |
+
72
|
752 |
],
|
753 |
"dtype": "uint32",
|
754 |
"format": "f32-to-bf16",
|
|
|
758 |
{
|
759 |
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
|
760 |
"shape": [
|
761 |
+
960,
|
762 |
+
18
|
763 |
],
|
764 |
"dtype": "float16",
|
765 |
"format": "f32-to-bf16",
|
|
|
769 |
{
|
770 |
"name": "model.layers.14.self_attn.o_proj.q_weight",
|
771 |
"shape": [
|
772 |
+
576,
|
773 |
+
72
|
774 |
],
|
775 |
"dtype": "uint32",
|
776 |
"format": "f32-to-bf16",
|
|
|
780 |
{
|
781 |
"name": "model.layers.14.self_attn.o_proj.q_scale",
|
782 |
"shape": [
|
783 |
+
576,
|
784 |
+
18
|
785 |
],
|
786 |
"dtype": "float16",
|
787 |
"format": "f32-to-bf16",
|
|
|
801 |
{
|
802 |
"name": "model.layers.15.mlp.down_proj.q_weight",
|
803 |
"shape": [
|
804 |
+
576,
|
805 |
+
192
|
806 |
],
|
807 |
"dtype": "uint32",
|
808 |
"format": "f32-to-bf16",
|
|
|
812 |
{
|
813 |
"name": "model.layers.15.mlp.down_proj.q_scale",
|
814 |
"shape": [
|
815 |
+
576,
|
816 |
+
48
|
817 |
],
|
818 |
"dtype": "float16",
|
819 |
"format": "f32-to-bf16",
|
|
|
823 |
{
|
824 |
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
|
825 |
"shape": [
|
826 |
+
3072,
|
827 |
+
72
|
828 |
],
|
829 |
"dtype": "uint32",
|
830 |
"format": "f32-to-bf16",
|
|
|
834 |
{
|
835 |
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
|
836 |
"shape": [
|
837 |
+
3072,
|
838 |
+
18
|
839 |
],
|
840 |
"dtype": "float16",
|
841 |
"format": "f32-to-bf16",
|
|
|
855 |
{
|
856 |
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
|
857 |
"shape": [
|
858 |
+
960,
|
859 |
+
72
|
860 |
],
|
861 |
"dtype": "uint32",
|
862 |
"format": "f32-to-bf16",
|
|
|
866 |
{
|
867 |
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
|
868 |
"shape": [
|
869 |
+
960,
|
870 |
+
18
|
871 |
],
|
872 |
"dtype": "float16",
|
873 |
"format": "f32-to-bf16",
|
|
|
877 |
{
|
878 |
"name": "model.layers.15.self_attn.o_proj.q_weight",
|
879 |
"shape": [
|
880 |
+
576,
|
881 |
+
72
|
882 |
],
|
883 |
"dtype": "uint32",
|
884 |
"format": "f32-to-bf16",
|
|
|
888 |
{
|
889 |
"name": "model.layers.15.self_attn.o_proj.q_scale",
|
890 |
"shape": [
|
891 |
+
576,
|
892 |
+
18
|
893 |
],
|
894 |
"dtype": "float16",
|
895 |
"format": "f32-to-bf16",
|
|
|
909 |
{
|
910 |
"name": "model.layers.16.mlp.down_proj.q_weight",
|
911 |
"shape": [
|
912 |
+
576,
|
913 |
+
192
|
914 |
],
|
915 |
"dtype": "uint32",
|
916 |
"format": "f32-to-bf16",
|
|
|
920 |
{
|
921 |
"name": "model.layers.16.mlp.down_proj.q_scale",
|
922 |
"shape": [
|
923 |
+
576,
|
924 |
+
48
|
925 |
],
|
926 |
"dtype": "float16",
|
927 |
"format": "f32-to-bf16",
|
|
|
931 |
{
|
932 |
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
|
933 |
"shape": [
|
934 |
+
3072,
|
935 |
+
72
|
936 |
],
|
937 |
"dtype": "uint32",
|
938 |
"format": "f32-to-bf16",
|
|
|
942 |
{
|
943 |
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
|
944 |
"shape": [
|
945 |
+
3072,
|
946 |
+
18
|
947 |
],
|
948 |
"dtype": "float16",
|
949 |
"format": "f32-to-bf16",
|
|
|
961 |
"byteOffset": 33363396
|
962 |
}
|
963 |
],
|
964 |
+
"md5sum": "28cb4b478cefb107df66c43c177153b6"
|
965 |
},
|
966 |
{
|
967 |
"dataPath": "params_shard_1.bin",
|
|
|
971 |
{
|
972 |
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
|
973 |
"shape": [
|
974 |
+
960,
|
975 |
+
72
|
976 |
],
|
977 |
"dtype": "uint32",
|
978 |
"format": "f32-to-bf16",
|
|
|
982 |
{
|
983 |
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
|
984 |
"shape": [
|
985 |
+
960,
|
986 |
+
18
|
987 |
],
|
988 |
"dtype": "float16",
|
989 |
"format": "f32-to-bf16",
|
|
|
993 |
{
|
994 |
"name": "model.layers.16.self_attn.o_proj.q_weight",
|
995 |
"shape": [
|
996 |
+
576,
|
997 |
+
72
|
998 |
],
|
999 |
"dtype": "uint32",
|
1000 |
"format": "f32-to-bf16",
|
|
|
1004 |
{
|
1005 |
"name": "model.layers.16.self_attn.o_proj.q_scale",
|
1006 |
"shape": [
|
1007 |
+
576,
|
1008 |
+
18
|
1009 |
],
|
1010 |
"dtype": "float16",
|
1011 |
"format": "f32-to-bf16",
|
|
|
1025 |
{
|
1026 |
"name": "model.layers.17.mlp.down_proj.q_weight",
|
1027 |
"shape": [
|
1028 |
+
576,
|
1029 |
+
192
|
1030 |
],
|
1031 |
"dtype": "uint32",
|
1032 |
"format": "f32-to-bf16",
|
|
|
1036 |
{
|
1037 |
"name": "model.layers.17.mlp.down_proj.q_scale",
|
1038 |
"shape": [
|
1039 |
+
576,
|
1040 |
+
48
|
1041 |
],
|
1042 |
"dtype": "float16",
|
1043 |
"format": "f32-to-bf16",
|
|
|
1047 |
{
|
1048 |
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
|
1049 |
"shape": [
|
1050 |
+
3072,
|
1051 |
+
72
|
1052 |
],
|
1053 |
"dtype": "uint32",
|
1054 |
"format": "f32-to-bf16",
|
|
|
1058 |
{
|
1059 |
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
|
1060 |
"shape": [
|
1061 |
+
3072,
|
1062 |
+
18
|
1063 |
],
|
1064 |
"dtype": "float16",
|
1065 |
"format": "f32-to-bf16",
|
|
|
1079 |
{
|
1080 |
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
|
1081 |
"shape": [
|
1082 |
+
960,
|
1083 |
+
72
|
1084 |
],
|
1085 |
"dtype": "uint32",
|
1086 |
"format": "f32-to-bf16",
|
|
|
1090 |
{
|
1091 |
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
|
1092 |
"shape": [
|
1093 |
+
960,
|
1094 |
+
18
|
1095 |
],
|
1096 |
"dtype": "float16",
|
1097 |
"format": "f32-to-bf16",
|
|
|
1101 |
{
|
1102 |
"name": "model.layers.17.self_attn.o_proj.q_weight",
|
1103 |
"shape": [
|
1104 |
+
576,
|
1105 |
+
72
|
1106 |
],
|
1107 |
"dtype": "uint32",
|
1108 |
"format": "f32-to-bf16",
|
|
|
1112 |
{
|
1113 |
"name": "model.layers.17.self_attn.o_proj.q_scale",
|
1114 |
"shape": [
|
1115 |
+
576,
|
1116 |
+
18
|
1117 |
],
|
1118 |
"dtype": "float16",
|
1119 |
"format": "f32-to-bf16",
|
|
|
1133 |
{
|
1134 |
"name": "model.layers.18.mlp.down_proj.q_weight",
|
1135 |
"shape": [
|
1136 |
+
576,
|
1137 |
+
192
|
1138 |
],
|
1139 |
"dtype": "uint32",
|
1140 |
"format": "f32-to-bf16",
|
|
|
1144 |
{
|
1145 |
"name": "model.layers.18.mlp.down_proj.q_scale",
|
1146 |
"shape": [
|
1147 |
+
576,
|
1148 |
+
48
|
1149 |
],
|
1150 |
"dtype": "float16",
|
1151 |
"format": "f32-to-bf16",
|
|
|
1155 |
{
|
1156 |
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
|
1157 |
"shape": [
|
1158 |
+
3072,
|
1159 |
+
72
|
1160 |
],
|
1161 |
"dtype": "uint32",
|
1162 |
"format": "f32-to-bf16",
|
|
|
1166 |
{
|
1167 |
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
|
1168 |
"shape": [
|
1169 |
+
3072,
|
1170 |
+
18
|
1171 |
],
|
1172 |
"dtype": "float16",
|
1173 |
"format": "f32-to-bf16",
|
|
|
1187 |
{
|
1188 |
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
|
1189 |
"shape": [
|
1190 |
+
960,
|
1191 |
+
72
|
1192 |
],
|
1193 |
"dtype": "uint32",
|
1194 |
"format": "f32-to-bf16",
|
|
|
1198 |
{
|
1199 |
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
|
1200 |
"shape": [
|
1201 |
+
960,
|
1202 |
+
18
|
1203 |
],
|
1204 |
"dtype": "float16",
|
1205 |
"format": "f32-to-bf16",
|
|
|
1209 |
{
|
1210 |
"name": "model.layers.18.self_attn.o_proj.q_weight",
|
1211 |
"shape": [
|
1212 |
+
576,
|
1213 |
+
72
|
1214 |
],
|
1215 |
"dtype": "uint32",
|
1216 |
"format": "f32-to-bf16",
|
|
|
1220 |
{
|
1221 |
"name": "model.layers.18.self_attn.o_proj.q_scale",
|
1222 |
"shape": [
|
1223 |
+
576,
|
1224 |
+
18
|
1225 |
],
|
1226 |
"dtype": "float16",
|
1227 |
"format": "f32-to-bf16",
|
|
|
1241 |
{
|
1242 |
"name": "model.layers.19.mlp.down_proj.q_weight",
|
1243 |
"shape": [
|
1244 |
+
576,
|
1245 |
+
192
|
1246 |
],
|
1247 |
"dtype": "uint32",
|
1248 |
"format": "f32-to-bf16",
|
|
|
1252 |
{
|
1253 |
"name": "model.layers.19.mlp.down_proj.q_scale",
|
1254 |
"shape": [
|
1255 |
+
576,
|
1256 |
+
48
|
1257 |
],
|
1258 |
"dtype": "float16",
|
1259 |
"format": "f32-to-bf16",
|
|
|
1263 |
{
|
1264 |
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
|
1265 |
"shape": [
|
1266 |
+
3072,
|
1267 |
+
72
|
1268 |
],
|
1269 |
"dtype": "uint32",
|
1270 |
"format": "f32-to-bf16",
|
|
|
1274 |
{
|
1275 |
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
|
1276 |
"shape": [
|
1277 |
+
3072,
|
1278 |
+
18
|
1279 |
],
|
1280 |
"dtype": "float16",
|
1281 |
"format": "f32-to-bf16",
|
|
|
1295 |
{
|
1296 |
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
|
1297 |
"shape": [
|
1298 |
+
960,
|
1299 |
+
72
|
1300 |
],
|
1301 |
"dtype": "uint32",
|
1302 |
"format": "f32-to-bf16",
|
|
|
1306 |
{
|
1307 |
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
|
1308 |
"shape": [
|
1309 |
+
960,
|
1310 |
+
18
|
1311 |
],
|
1312 |
"dtype": "float16",
|
1313 |
"format": "f32-to-bf16",
|
|
|
1317 |
{
|
1318 |
"name": "model.layers.19.self_attn.o_proj.q_weight",
|
1319 |
"shape": [
|
1320 |
+
576,
|
1321 |
+
72
|
1322 |
],
|
1323 |
"dtype": "uint32",
|
1324 |
"format": "f32-to-bf16",
|
|
|
1328 |
{
|
1329 |
"name": "model.layers.19.self_attn.o_proj.q_scale",
|
1330 |
"shape": [
|
1331 |
+
576,
|
1332 |
+
18
|
1333 |
],
|
1334 |
"dtype": "float16",
|
1335 |
"format": "f32-to-bf16",
|
|
|
1349 |
{
|
1350 |
"name": "model.layers.2.mlp.down_proj.q_weight",
|
1351 |
"shape": [
|
1352 |
+
576,
|
1353 |
+
192
|
1354 |
],
|
1355 |
"dtype": "uint32",
|
1356 |
"format": "f32-to-bf16",
|
|
|
1360 |
{
|
1361 |
"name": "model.layers.2.mlp.down_proj.q_scale",
|
1362 |
"shape": [
|
1363 |
+
576,
|
1364 |
+
48
|
1365 |
],
|
1366 |
"dtype": "float16",
|
1367 |
"format": "f32-to-bf16",
|
|
|
1371 |
{
|
1372 |
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
|
1373 |
"shape": [
|
1374 |
+
3072,
|
1375 |
+
72
|
1376 |
],
|
1377 |
"dtype": "uint32",
|
1378 |
"format": "f32-to-bf16",
|
|
|
1382 |
{
|
1383 |
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
|
1384 |
"shape": [
|
1385 |
+
3072,
|
1386 |
+
18
|
1387 |
],
|
1388 |
"dtype": "float16",
|
1389 |
"format": "f32-to-bf16",
|
|
|
1403 |
{
|
1404 |
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
|
1405 |
"shape": [
|
1406 |
+
960,
|
1407 |
+
72
|
1408 |
],
|
1409 |
"dtype": "uint32",
|
1410 |
"format": "f32-to-bf16",
|
|
|
1414 |
{
|
1415 |
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
|
1416 |
"shape": [
|
1417 |
+
960,
|
1418 |
+
18
|
1419 |
],
|
1420 |
"dtype": "float16",
|
1421 |
"format": "f32-to-bf16",
|
|
|
1425 |
{
|
1426 |
"name": "model.layers.2.self_attn.o_proj.q_weight",
|
1427 |
"shape": [
|
1428 |
+
576,
|
1429 |
+
72
|
1430 |
],
|
1431 |
"dtype": "uint32",
|
1432 |
"format": "f32-to-bf16",
|
|
|
1436 |
{
|
1437 |
"name": "model.layers.2.self_attn.o_proj.q_scale",
|
1438 |
"shape": [
|
1439 |
+
576,
|
1440 |
+
18
|
1441 |
],
|
1442 |
"dtype": "float16",
|
1443 |
"format": "f32-to-bf16",
|
|
|
1457 |
{
|
1458 |
"name": "model.layers.20.mlp.down_proj.q_weight",
|
1459 |
"shape": [
|
1460 |
+
576,
|
1461 |
+
192
|
1462 |
],
|
1463 |
"dtype": "uint32",
|
1464 |
"format": "f32-to-bf16",
|
|
|
1468 |
{
|
1469 |
"name": "model.layers.20.mlp.down_proj.q_scale",
|
1470 |
"shape": [
|
1471 |
+
576,
|
1472 |
+
48
|
1473 |
],
|
1474 |
"dtype": "float16",
|
1475 |
"format": "f32-to-bf16",
|
|
|
1479 |
{
|
1480 |
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
|
1481 |
"shape": [
|
1482 |
+
3072,
|
1483 |
+
72
|
1484 |
],
|
1485 |
"dtype": "uint32",
|
1486 |
"format": "f32-to-bf16",
|
|
|
1490 |
{
|
1491 |
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
|
1492 |
"shape": [
|
1493 |
+
3072,
|
1494 |
+
18
|
1495 |
],
|
1496 |
"dtype": "float16",
|
1497 |
"format": "f32-to-bf16",
|
|
|
1511 |
{
|
1512 |
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
|
1513 |
"shape": [
|
1514 |
+
960,
|
1515 |
+
72
|
1516 |
],
|
1517 |
"dtype": "uint32",
|
1518 |
"format": "f32-to-bf16",
|
|
|
1522 |
{
|
1523 |
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
|
1524 |
"shape": [
|
1525 |
+
960,
|
1526 |
+
18
|
1527 |
],
|
1528 |
"dtype": "float16",
|
1529 |
"format": "f32-to-bf16",
|
|
|
1533 |
{
|
1534 |
"name": "model.layers.20.self_attn.o_proj.q_weight",
|
1535 |
"shape": [
|
1536 |
+
576,
|
1537 |
+
72
|
1538 |
],
|
1539 |
"dtype": "uint32",
|
1540 |
"format": "f32-to-bf16",
|
|
|
1544 |
{
|
1545 |
"name": "model.layers.20.self_attn.o_proj.q_scale",
|
1546 |
"shape": [
|
1547 |
+
576,
|
1548 |
+
18
|
1549 |
],
|
1550 |
"dtype": "float16",
|
1551 |
"format": "f32-to-bf16",
|
|
|
1565 |
{
|
1566 |
"name": "model.layers.21.mlp.down_proj.q_weight",
|
1567 |
"shape": [
|
1568 |
+
576,
|
1569 |
+
192
|
1570 |
],
|
1571 |
"dtype": "uint32",
|
1572 |
"format": "f32-to-bf16",
|
|
|
1576 |
{
|
1577 |
"name": "model.layers.21.mlp.down_proj.q_scale",
|
1578 |
"shape": [
|
1579 |
+
576,
|
1580 |
+
48
|
1581 |
],
|
1582 |
"dtype": "float16",
|
1583 |
"format": "f32-to-bf16",
|
|
|
1587 |
{
|
1588 |
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
|
1589 |
"shape": [
|
1590 |
+
3072,
|
1591 |
+
72
|
1592 |
],
|
1593 |
"dtype": "uint32",
|
1594 |
"format": "f32-to-bf16",
|
|
|
1598 |
{
|
1599 |
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
|
1600 |
"shape": [
|
1601 |
+
3072,
|
1602 |
+
18
|
1603 |
],
|
1604 |
"dtype": "float16",
|
1605 |
"format": "f32-to-bf16",
|
|
|
1619 |
{
|
1620 |
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
|
1621 |
"shape": [
|
1622 |
+
960,
|
1623 |
+
72
|
1624 |
],
|
1625 |
"dtype": "uint32",
|
1626 |
"format": "f32-to-bf16",
|
|
|
1630 |
{
|
1631 |
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
|
1632 |
"shape": [
|
1633 |
+
960,
|
1634 |
+
18
|
1635 |
],
|
1636 |
"dtype": "float16",
|
1637 |
"format": "f32-to-bf16",
|
|
|
1641 |
{
|
1642 |
"name": "model.layers.21.self_attn.o_proj.q_weight",
|
1643 |
"shape": [
|
1644 |
+
576,
|
1645 |
+
72
|
1646 |
],
|
1647 |
"dtype": "uint32",
|
1648 |
"format": "f32-to-bf16",
|
|
|
1652 |
{
|
1653 |
"name": "model.layers.21.self_attn.o_proj.q_scale",
|
1654 |
"shape": [
|
1655 |
+
576,
|
1656 |
+
18
|
1657 |
],
|
1658 |
"dtype": "float16",
|
1659 |
"format": "f32-to-bf16",
|
|
|
1673 |
{
|
1674 |
"name": "model.layers.22.mlp.down_proj.q_weight",
|
1675 |
"shape": [
|
1676 |
+
576,
|
1677 |
+
192
|
1678 |
],
|
1679 |
"dtype": "uint32",
|
1680 |
"format": "f32-to-bf16",
|
|
|
1684 |
{
|
1685 |
"name": "model.layers.22.mlp.down_proj.q_scale",
|
1686 |
"shape": [
|
1687 |
+
576,
|
1688 |
+
48
|
1689 |
],
|
1690 |
"dtype": "float16",
|
1691 |
"format": "f32-to-bf16",
|
|
|
1695 |
{
|
1696 |
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
|
1697 |
"shape": [
|
1698 |
+
3072,
|
1699 |
+
72
|
1700 |
],
|
1701 |
"dtype": "uint32",
|
1702 |
"format": "f32-to-bf16",
|
|
|
1706 |
{
|
1707 |
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
|
1708 |
"shape": [
|
1709 |
+
3072,
|
1710 |
+
18
|
1711 |
],
|
1712 |
"dtype": "float16",
|
1713 |
"format": "f32-to-bf16",
|
|
|
1727 |
{
|
1728 |
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
|
1729 |
"shape": [
|
1730 |
+
960,
|
1731 |
+
72
|
1732 |
],
|
1733 |
"dtype": "uint32",
|
1734 |
"format": "f32-to-bf16",
|
|
|
1738 |
{
|
1739 |
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
|
1740 |
"shape": [
|
1741 |
+
960,
|
1742 |
+
18
|
1743 |
],
|
1744 |
"dtype": "float16",
|
1745 |
"format": "f32-to-bf16",
|
|
|
1749 |
{
|
1750 |
"name": "model.layers.22.self_attn.o_proj.q_weight",
|
1751 |
"shape": [
|
1752 |
+
576,
|
1753 |
+
72
|
1754 |
],
|
1755 |
"dtype": "uint32",
|
1756 |
"format": "f32-to-bf16",
|
|
|
1760 |
{
|
1761 |
"name": "model.layers.22.self_attn.o_proj.q_scale",
|
1762 |
"shape": [
|
1763 |
+
576,
|
1764 |
+
18
|
1765 |
],
|
1766 |
"dtype": "float16",
|
1767 |
"format": "f32-to-bf16",
|
|
|
1781 |
{
|
1782 |
"name": "model.layers.23.mlp.down_proj.q_weight",
|
1783 |
"shape": [
|
1784 |
+
576,
|
1785 |
+
192
|
1786 |
],
|
1787 |
"dtype": "uint32",
|
1788 |
"format": "f32-to-bf16",
|
|
|
1792 |
{
|
1793 |
"name": "model.layers.23.mlp.down_proj.q_scale",
|
1794 |
"shape": [
|
1795 |
+
576,
|
1796 |
+
48
|
1797 |
],
|
1798 |
"dtype": "float16",
|
1799 |
"format": "f32-to-bf16",
|
|
|
1803 |
{
|
1804 |
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
|
1805 |
"shape": [
|
1806 |
+
3072,
|
1807 |
+
72
|
1808 |
],
|
1809 |
"dtype": "uint32",
|
1810 |
"format": "f32-to-bf16",
|
|
|
1814 |
{
|
1815 |
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
|
1816 |
"shape": [
|
1817 |
+
3072,
|
1818 |
+
18
|
1819 |
],
|
1820 |
"dtype": "float16",
|
1821 |
"format": "f32-to-bf16",
|
|
|
1835 |
{
|
1836 |
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
|
1837 |
"shape": [
|
1838 |
+
960,
|
1839 |
+
72
|
1840 |
],
|
1841 |
"dtype": "uint32",
|
1842 |
"format": "f32-to-bf16",
|
|
|
1846 |
{
|
1847 |
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
|
1848 |
"shape": [
|
1849 |
+
960,
|
1850 |
+
18
|
1851 |
],
|
1852 |
"dtype": "float16",
|
1853 |
"format": "f32-to-bf16",
|
|
|
1857 |
{
|
1858 |
"name": "model.layers.23.self_attn.o_proj.q_weight",
|
1859 |
"shape": [
|
1860 |
+
576,
|
1861 |
+
72
|
1862 |
],
|
1863 |
"dtype": "uint32",
|
1864 |
"format": "f32-to-bf16",
|
|
|
1868 |
{
|
1869 |
"name": "model.layers.23.self_attn.o_proj.q_scale",
|
1870 |
"shape": [
|
1871 |
+
576,
|
1872 |
+
18
|
1873 |
],
|
1874 |
"dtype": "float16",
|
1875 |
"format": "f32-to-bf16",
|
|
|
1889 |
{
|
1890 |
"name": "model.layers.24.mlp.down_proj.q_weight",
|
1891 |
"shape": [
|
1892 |
+
576,
|
1893 |
+
192
|
1894 |
],
|
1895 |
"dtype": "uint32",
|
1896 |
"format": "f32-to-bf16",
|
|
|
1900 |
{
|
1901 |
"name": "model.layers.24.mlp.down_proj.q_scale",
|
1902 |
"shape": [
|
1903 |
+
576,
|
1904 |
+
48
|
1905 |
],
|
1906 |
"dtype": "float16",
|
1907 |
"format": "f32-to-bf16",
|
|
|
1911 |
{
|
1912 |
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
|
1913 |
"shape": [
|
1914 |
+
3072,
|
1915 |
+
72
|
1916 |
],
|
1917 |
"dtype": "uint32",
|
1918 |
"format": "f32-to-bf16",
|
|
|
1922 |
{
|
1923 |
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
|
1924 |
"shape": [
|
1925 |
+
3072,
|
1926 |
+
18
|
1927 |
],
|
1928 |
"dtype": "float16",
|
1929 |
"format": "f32-to-bf16",
|
|
|
1943 |
{
|
1944 |
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
|
1945 |
"shape": [
|
1946 |
+
960,
|
1947 |
+
72
|
1948 |
],
|
1949 |
"dtype": "uint32",
|
1950 |
"format": "f32-to-bf16",
|
|
|
1954 |
{
|
1955 |
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
|
1956 |
"shape": [
|
1957 |
+
960,
|
1958 |
+
18
|
1959 |
],
|
1960 |
"dtype": "float16",
|
1961 |
"format": "f32-to-bf16",
|
|
|
1965 |
{
|
1966 |
"name": "model.layers.24.self_attn.o_proj.q_weight",
|
1967 |
"shape": [
|
1968 |
+
576,
|
1969 |
+
72
|
1970 |
],
|
1971 |
"dtype": "uint32",
|
1972 |
"format": "f32-to-bf16",
|
|
|
1976 |
{
|
1977 |
"name": "model.layers.24.self_attn.o_proj.q_scale",
|
1978 |
"shape": [
|
1979 |
+
576,
|
1980 |
+
18
|
1981 |
],
|
1982 |
"dtype": "float16",
|
1983 |
"format": "f32-to-bf16",
|
|
|
1997 |
{
|
1998 |
"name": "model.layers.25.mlp.down_proj.q_weight",
|
1999 |
"shape": [
|
2000 |
+
576,
|
2001 |
+
192
|
2002 |
],
|
2003 |
"dtype": "uint32",
|
2004 |
"format": "f32-to-bf16",
|
|
|
2008 |
{
|
2009 |
"name": "model.layers.25.mlp.down_proj.q_scale",
|
2010 |
"shape": [
|
2011 |
+
576,
|
2012 |
+
48
|
2013 |
],
|
2014 |
"dtype": "float16",
|
2015 |
"format": "f32-to-bf16",
|
|
|
2019 |
{
|
2020 |
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
|
2021 |
"shape": [
|
2022 |
+
3072,
|
2023 |
+
72
|
2024 |
],
|
2025 |
"dtype": "uint32",
|
2026 |
"format": "f32-to-bf16",
|
|
|
2030 |
{
|
2031 |
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
|
2032 |
"shape": [
|
2033 |
+
3072,
|
2034 |
+
18
|
2035 |
],
|
2036 |
"dtype": "float16",
|
2037 |
"format": "f32-to-bf16",
|
|
|
2051 |
{
|
2052 |
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
|
2053 |
"shape": [
|
2054 |
+
960,
|
2055 |
+
72
|
2056 |
],
|
2057 |
"dtype": "uint32",
|
2058 |
"format": "f32-to-bf16",
|
|
|
2062 |
{
|
2063 |
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
|
2064 |
"shape": [
|
2065 |
+
960,
|
2066 |
+
18
|
2067 |
],
|
2068 |
"dtype": "float16",
|
2069 |
"format": "f32-to-bf16",
|
|
|
2073 |
{
|
2074 |
"name": "model.layers.25.self_attn.o_proj.q_weight",
|
2075 |
"shape": [
|
2076 |
+
576,
|
2077 |
+
72
|
2078 |
],
|
2079 |
"dtype": "uint32",
|
2080 |
"format": "f32-to-bf16",
|
|
|
2084 |
{
|
2085 |
"name": "model.layers.25.self_attn.o_proj.q_scale",
|
2086 |
"shape": [
|
2087 |
+
576,
|
2088 |
+
18
|
2089 |
],
|
2090 |
"dtype": "float16",
|
2091 |
"format": "f32-to-bf16",
|
|
|
2105 |
{
|
2106 |
"name": "model.layers.26.mlp.down_proj.q_weight",
|
2107 |
"shape": [
|
2108 |
+
576,
|
2109 |
+
192
|
2110 |
],
|
2111 |
"dtype": "uint32",
|
2112 |
"format": "f32-to-bf16",
|
|
|
2116 |
{
|
2117 |
"name": "model.layers.26.mlp.down_proj.q_scale",
|
2118 |
"shape": [
|
2119 |
+
576,
|
2120 |
+
48
|
2121 |
],
|
2122 |
"dtype": "float16",
|
2123 |
"format": "f32-to-bf16",
|
|
|
2127 |
{
|
2128 |
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
|
2129 |
"shape": [
|
2130 |
+
3072,
|
2131 |
+
72
|
2132 |
],
|
2133 |
"dtype": "uint32",
|
2134 |
"format": "f32-to-bf16",
|
|
|
2138 |
{
|
2139 |
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
|
2140 |
"shape": [
|
2141 |
+
3072,
|
2142 |
+
18
|
2143 |
],
|
2144 |
"dtype": "float16",
|
2145 |
"format": "f32-to-bf16",
|
|
|
2159 |
{
|
2160 |
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
|
2161 |
"shape": [
|
2162 |
+
960,
|
2163 |
+
72
|
2164 |
],
|
2165 |
"dtype": "uint32",
|
2166 |
"format": "f32-to-bf16",
|
|
|
2170 |
{
|
2171 |
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
|
2172 |
"shape": [
|
2173 |
+
960,
|
2174 |
+
18
|
2175 |
],
|
2176 |
"dtype": "float16",
|
2177 |
"format": "f32-to-bf16",
|
|
|
2181 |
{
|
2182 |
"name": "model.layers.26.self_attn.o_proj.q_weight",
|
2183 |
"shape": [
|
2184 |
+
576,
|
2185 |
+
72
|
2186 |
],
|
2187 |
"dtype": "uint32",
|
2188 |
"format": "f32-to-bf16",
|
|
|
2192 |
{
|
2193 |
"name": "model.layers.26.self_attn.o_proj.q_scale",
|
2194 |
"shape": [
|
2195 |
+
576,
|
2196 |
+
18
|
2197 |
],
|
2198 |
"dtype": "float16",
|
2199 |
"format": "f32-to-bf16",
|
|
|
2213 |
{
|
2214 |
"name": "model.layers.27.mlp.down_proj.q_weight",
|
2215 |
"shape": [
|
2216 |
+
576,
|
2217 |
+
192
|
2218 |
],
|
2219 |
"dtype": "uint32",
|
2220 |
"format": "f32-to-bf16",
|
|
|
2224 |
{
|
2225 |
"name": "model.layers.27.mlp.down_proj.q_scale",
|
2226 |
"shape": [
|
2227 |
+
576,
|
2228 |
+
48
|
2229 |
],
|
2230 |
"dtype": "float16",
|
2231 |
"format": "f32-to-bf16",
|
|
|
2235 |
{
|
2236 |
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
|
2237 |
"shape": [
|
2238 |
+
3072,
|
2239 |
+
72
|
2240 |
],
|
2241 |
"dtype": "uint32",
|
2242 |
"format": "f32-to-bf16",
|
|
|
2246 |
{
|
2247 |
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
|
2248 |
"shape": [
|
2249 |
+
3072,
|
2250 |
+
18
|
2251 |
],
|
2252 |
"dtype": "float16",
|
2253 |
"format": "f32-to-bf16",
|
|
|
2267 |
{
|
2268 |
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
|
2269 |
"shape": [
|
2270 |
+
960,
|
2271 |
+
72
|
2272 |
],
|
2273 |
"dtype": "uint32",
|
2274 |
"format": "f32-to-bf16",
|
|
|
2278 |
{
|
2279 |
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
|
2280 |
"shape": [
|
2281 |
+
960,
|
2282 |
+
18
|
2283 |
],
|
2284 |
"dtype": "float16",
|
2285 |
"format": "f32-to-bf16",
|
|
|
2289 |
{
|
2290 |
"name": "model.layers.27.self_attn.o_proj.q_weight",
|
2291 |
"shape": [
|
2292 |
+
576,
|
2293 |
+
72
|
2294 |
],
|
2295 |
"dtype": "uint32",
|
2296 |
"format": "f32-to-bf16",
|
|
|
2300 |
{
|
2301 |
"name": "model.layers.27.self_attn.o_proj.q_scale",
|
2302 |
"shape": [
|
2303 |
+
576,
|
2304 |
+
18
|
2305 |
],
|
2306 |
"dtype": "float16",
|
2307 |
"format": "f32-to-bf16",
|
|
|
2321 |
{
|
2322 |
"name": "model.layers.28.mlp.down_proj.q_weight",
|
2323 |
"shape": [
|
2324 |
+
576,
|
2325 |
+
192
|
2326 |
],
|
2327 |
"dtype": "uint32",
|
2328 |
"format": "f32-to-bf16",
|
|
|
2332 |
{
|
2333 |
"name": "model.layers.28.mlp.down_proj.q_scale",
|
2334 |
"shape": [
|
2335 |
+
576,
|
2336 |
+
48
|
2337 |
],
|
2338 |
"dtype": "float16",
|
2339 |
"format": "f32-to-bf16",
|
|
|
2343 |
{
|
2344 |
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
|
2345 |
"shape": [
|
2346 |
+
3072,
|
2347 |
+
72
|
2348 |
],
|
2349 |
"dtype": "uint32",
|
2350 |
"format": "f32-to-bf16",
|
|
|
2354 |
{
|
2355 |
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
|
2356 |
"shape": [
|
2357 |
+
3072,
|
2358 |
+
18
|
2359 |
],
|
2360 |
"dtype": "float16",
|
2361 |
"format": "f32-to-bf16",
|
|
|
2375 |
{
|
2376 |
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
|
2377 |
"shape": [
|
2378 |
+
960,
|
2379 |
+
72
|
2380 |
],
|
2381 |
"dtype": "uint32",
|
2382 |
"format": "f32-to-bf16",
|
|
|
2386 |
{
|
2387 |
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
|
2388 |
"shape": [
|
2389 |
+
960,
|
2390 |
+
18
|
2391 |
],
|
2392 |
"dtype": "float16",
|
2393 |
"format": "f32-to-bf16",
|
|
|
2397 |
{
|
2398 |
"name": "model.layers.28.self_attn.o_proj.q_weight",
|
2399 |
"shape": [
|
2400 |
+
576,
|
2401 |
+
72
|
2402 |
],
|
2403 |
"dtype": "uint32",
|
2404 |
"format": "f32-to-bf16",
|
|
|
2408 |
{
|
2409 |
"name": "model.layers.28.self_attn.o_proj.q_scale",
|
2410 |
"shape": [
|
2411 |
+
576,
|
2412 |
+
18
|
2413 |
],
|
2414 |
"dtype": "float16",
|
2415 |
"format": "f32-to-bf16",
|
|
|
2429 |
{
|
2430 |
"name": "model.layers.29.mlp.down_proj.q_weight",
|
2431 |
"shape": [
|
2432 |
+
576,
|
2433 |
+
192
|
2434 |
],
|
2435 |
"dtype": "uint32",
|
2436 |
"format": "f32-to-bf16",
|
|
|
2440 |
{
|
2441 |
"name": "model.layers.29.mlp.down_proj.q_scale",
|
2442 |
"shape": [
|
2443 |
+
576,
|
2444 |
+
48
|
2445 |
],
|
2446 |
"dtype": "float16",
|
2447 |
"format": "f32-to-bf16",
|
|
|
2451 |
{
|
2452 |
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
|
2453 |
"shape": [
|
2454 |
+
3072,
|
2455 |
+
72
|
2456 |
],
|
2457 |
"dtype": "uint32",
|
2458 |
"format": "f32-to-bf16",
|
|
|
2462 |
{
|
2463 |
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
|
2464 |
"shape": [
|
2465 |
+
3072,
|
2466 |
+
18
|
2467 |
],
|
2468 |
"dtype": "float16",
|
2469 |
"format": "f32-to-bf16",
|
|
|
2483 |
{
|
2484 |
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
|
2485 |
"shape": [
|
2486 |
+
960,
|
2487 |
+
72
|
2488 |
],
|
2489 |
"dtype": "uint32",
|
2490 |
"format": "f32-to-bf16",
|
|
|
2494 |
{
|
2495 |
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
|
2496 |
"shape": [
|
2497 |
+
960,
|
2498 |
+
18
|
2499 |
],
|
2500 |
"dtype": "float16",
|
2501 |
"format": "f32-to-bf16",
|
|
|
2505 |
{
|
2506 |
"name": "model.layers.29.self_attn.o_proj.q_weight",
|
2507 |
"shape": [
|
2508 |
+
576,
|
2509 |
+
72
|
2510 |
],
|
2511 |
"dtype": "uint32",
|
2512 |
"format": "f32-to-bf16",
|
|
|
2516 |
{
|
2517 |
"name": "model.layers.29.self_attn.o_proj.q_scale",
|
2518 |
"shape": [
|
2519 |
+
576,
|
2520 |
+
18
|
2521 |
],
|
2522 |
"dtype": "float16",
|
2523 |
"format": "f32-to-bf16",
|
|
|
2537 |
{
|
2538 |
"name": "model.layers.3.mlp.down_proj.q_weight",
|
2539 |
"shape": [
|
2540 |
+
576,
|
2541 |
+
192
|
2542 |
],
|
2543 |
"dtype": "uint32",
|
2544 |
"format": "f32-to-bf16",
|
|
|
2548 |
{
|
2549 |
"name": "model.layers.3.mlp.down_proj.q_scale",
|
2550 |
"shape": [
|
2551 |
+
576,
|
2552 |
+
48
|
2553 |
],
|
2554 |
"dtype": "float16",
|
2555 |
"format": "f32-to-bf16",
|
|
|
2559 |
{
|
2560 |
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
|
2561 |
"shape": [
|
2562 |
+
3072,
|
2563 |
+
72
|
2564 |
],
|
2565 |
"dtype": "uint32",
|
2566 |
"format": "f32-to-bf16",
|
|
|
2570 |
{
|
2571 |
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
|
2572 |
"shape": [
|
2573 |
+
3072,
|
2574 |
+
18
|
2575 |
],
|
2576 |
"dtype": "float16",
|
2577 |
"format": "f32-to-bf16",
|
|
|
2591 |
{
|
2592 |
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
|
2593 |
"shape": [
|
2594 |
+
960,
|
2595 |
+
72
|
2596 |
],
|
2597 |
"dtype": "uint32",
|
2598 |
"format": "f32-to-bf16",
|
|
|
2602 |
{
|
2603 |
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
|
2604 |
"shape": [
|
2605 |
+
960,
|
2606 |
+
18
|
2607 |
],
|
2608 |
"dtype": "float16",
|
2609 |
"format": "f32-to-bf16",
|
|
|
2613 |
{
|
2614 |
"name": "model.layers.3.self_attn.o_proj.q_weight",
|
2615 |
"shape": [
|
2616 |
+
576,
|
2617 |
+
72
|
2618 |
],
|
2619 |
"dtype": "uint32",
|
2620 |
"format": "f32-to-bf16",
|
|
|
2624 |
{
|
2625 |
"name": "model.layers.3.self_attn.o_proj.q_scale",
|
2626 |
"shape": [
|
2627 |
+
576,
|
2628 |
+
18
|
2629 |
],
|
2630 |
"dtype": "float16",
|
2631 |
"format": "f32-to-bf16",
|
|
|
2645 |
{
|
2646 |
"name": "model.layers.4.mlp.down_proj.q_weight",
|
2647 |
"shape": [
|
2648 |
+
576,
|
2649 |
+
192
|
2650 |
],
|
2651 |
"dtype": "uint32",
|
2652 |
"format": "f32-to-bf16",
|
|
|
2656 |
{
|
2657 |
"name": "model.layers.4.mlp.down_proj.q_scale",
|
2658 |
"shape": [
|
2659 |
+
576,
|
2660 |
+
48
|
2661 |
],
|
2662 |
"dtype": "float16",
|
2663 |
"format": "f32-to-bf16",
|
|
|
2667 |
{
|
2668 |
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
|
2669 |
"shape": [
|
2670 |
+
3072,
|
2671 |
+
72
|
2672 |
],
|
2673 |
"dtype": "uint32",
|
2674 |
"format": "f32-to-bf16",
|
|
|
2678 |
{
|
2679 |
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
|
2680 |
"shape": [
|
2681 |
+
3072,
|
2682 |
+
18
|
2683 |
],
|
2684 |
"dtype": "float16",
|
2685 |
"format": "f32-to-bf16",
|
|
|
2699 |
{
|
2700 |
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
|
2701 |
"shape": [
|
2702 |
+
960,
|
2703 |
+
72
|
2704 |
],
|
2705 |
"dtype": "uint32",
|
2706 |
"format": "f32-to-bf16",
|
|
|
2710 |
{
|
2711 |
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
|
2712 |
"shape": [
|
2713 |
+
960,
|
2714 |
+
18
|
2715 |
],
|
2716 |
"dtype": "float16",
|
2717 |
"format": "f32-to-bf16",
|
|
|
2721 |
{
|
2722 |
"name": "model.layers.4.self_attn.o_proj.q_weight",
|
2723 |
"shape": [
|
2724 |
+
576,
|
2725 |
+
72
|
2726 |
],
|
2727 |
"dtype": "uint32",
|
2728 |
"format": "f32-to-bf16",
|
|
|
2732 |
{
|
2733 |
"name": "model.layers.4.self_attn.o_proj.q_scale",
|
2734 |
"shape": [
|
2735 |
+
576,
|
2736 |
+
18
|
2737 |
],
|
2738 |
"dtype": "float16",
|
2739 |
"format": "f32-to-bf16",
|
|
|
2753 |
{
|
2754 |
"name": "model.layers.5.mlp.down_proj.q_weight",
|
2755 |
"shape": [
|
2756 |
+
576,
|
2757 |
+
192
|
2758 |
],
|
2759 |
"dtype": "uint32",
|
2760 |
"format": "f32-to-bf16",
|
|
|
2764 |
{
|
2765 |
"name": "model.layers.5.mlp.down_proj.q_scale",
|
2766 |
"shape": [
|
2767 |
+
576,
|
2768 |
+
48
|
2769 |
],
|
2770 |
"dtype": "float16",
|
2771 |
"format": "f32-to-bf16",
|
|
|
2773 |
"byteOffset": 32828544
|
2774 |
}
|
2775 |
],
|
2776 |
+
"md5sum": "b31a66d8b09f833eba9e80bd7b3d8cac"
|
2777 |
},
|
2778 |
{
|
2779 |
"dataPath": "params_shard_2.bin",
|
|
|
2783 |
{
|
2784 |
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
|
2785 |
"shape": [
|
2786 |
+
3072,
|
2787 |
+
72
|
2788 |
],
|
2789 |
"dtype": "uint32",
|
2790 |
"format": "f32-to-bf16",
|
|
|
2794 |
{
|
2795 |
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
|
2796 |
"shape": [
|
2797 |
+
3072,
|
2798 |
+
18
|
2799 |
],
|
2800 |
"dtype": "float16",
|
2801 |
"format": "f32-to-bf16",
|
|
|
2815 |
{
|
2816 |
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
|
2817 |
"shape": [
|
2818 |
+
960,
|
2819 |
+
72
|
2820 |
],
|
2821 |
"dtype": "uint32",
|
2822 |
"format": "f32-to-bf16",
|
|
|
2826 |
{
|
2827 |
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
|
2828 |
"shape": [
|
2829 |
+
960,
|
2830 |
+
18
|
2831 |
],
|
2832 |
"dtype": "float16",
|
2833 |
"format": "f32-to-bf16",
|
|
|
2837 |
{
|
2838 |
"name": "model.layers.5.self_attn.o_proj.q_weight",
|
2839 |
"shape": [
|
2840 |
+
576,
|
2841 |
+
72
|
2842 |
],
|
2843 |
"dtype": "uint32",
|
2844 |
"format": "f32-to-bf16",
|
|
|
2848 |
{
|
2849 |
"name": "model.layers.5.self_attn.o_proj.q_scale",
|
2850 |
"shape": [
|
2851 |
+
576,
|
2852 |
+
18
|
2853 |
],
|
2854 |
"dtype": "float16",
|
2855 |
"format": "f32-to-bf16",
|
|
|
2869 |
{
|
2870 |
"name": "model.layers.6.mlp.down_proj.q_weight",
|
2871 |
"shape": [
|
2872 |
+
576,
|
2873 |
+
192
|
2874 |
],
|
2875 |
"dtype": "uint32",
|
2876 |
"format": "f32-to-bf16",
|
|
|
2880 |
{
|
2881 |
"name": "model.layers.6.mlp.down_proj.q_scale",
|
2882 |
"shape": [
|
2883 |
+
576,
|
2884 |
+
48
|
2885 |
],
|
2886 |
"dtype": "float16",
|
2887 |
"format": "f32-to-bf16",
|
|
|
2891 |
{
|
2892 |
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
|
2893 |
"shape": [
|
2894 |
+
3072,
|
2895 |
+
72
|
2896 |
],
|
2897 |
"dtype": "uint32",
|
2898 |
"format": "f32-to-bf16",
|
|
|
2902 |
{
|
2903 |
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
|
2904 |
"shape": [
|
2905 |
+
3072,
|
2906 |
+
18
|
2907 |
],
|
2908 |
"dtype": "float16",
|
2909 |
"format": "f32-to-bf16",
|
|
|
2923 |
{
|
2924 |
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
|
2925 |
"shape": [
|
2926 |
+
960,
|
2927 |
+
72
|
2928 |
],
|
2929 |
"dtype": "uint32",
|
2930 |
"format": "f32-to-bf16",
|
|
|
2934 |
{
|
2935 |
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
|
2936 |
"shape": [
|
2937 |
+
960,
|
2938 |
+
18
|
2939 |
],
|
2940 |
"dtype": "float16",
|
2941 |
"format": "f32-to-bf16",
|
|
|
2945 |
{
|
2946 |
"name": "model.layers.6.self_attn.o_proj.q_weight",
|
2947 |
"shape": [
|
2948 |
+
576,
|
2949 |
+
72
|
2950 |
],
|
2951 |
"dtype": "uint32",
|
2952 |
"format": "f32-to-bf16",
|
|
|
2956 |
{
|
2957 |
"name": "model.layers.6.self_attn.o_proj.q_scale",
|
2958 |
"shape": [
|
2959 |
+
576,
|
2960 |
+
18
|
2961 |
],
|
2962 |
"dtype": "float16",
|
2963 |
"format": "f32-to-bf16",
|
|
|
2977 |
{
|
2978 |
"name": "model.layers.7.mlp.down_proj.q_weight",
|
2979 |
"shape": [
|
2980 |
+
576,
|
2981 |
+
192
|
2982 |
],
|
2983 |
"dtype": "uint32",
|
2984 |
"format": "f32-to-bf16",
|
|
|
2988 |
{
|
2989 |
"name": "model.layers.7.mlp.down_proj.q_scale",
|
2990 |
"shape": [
|
2991 |
+
576,
|
2992 |
+
48
|
2993 |
],
|
2994 |
"dtype": "float16",
|
2995 |
"format": "f32-to-bf16",
|
|
|
2999 |
{
|
3000 |
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
|
3001 |
"shape": [
|
3002 |
+
3072,
|
3003 |
+
72
|
3004 |
],
|
3005 |
"dtype": "uint32",
|
3006 |
"format": "f32-to-bf16",
|
|
|
3010 |
{
|
3011 |
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
|
3012 |
"shape": [
|
3013 |
+
3072,
|
3014 |
+
18
|
3015 |
],
|
3016 |
"dtype": "float16",
|
3017 |
"format": "f32-to-bf16",
|
|
|
3031 |
{
|
3032 |
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
|
3033 |
"shape": [
|
3034 |
+
960,
|
3035 |
+
72
|
3036 |
],
|
3037 |
"dtype": "uint32",
|
3038 |
"format": "f32-to-bf16",
|
|
|
3042 |
{
|
3043 |
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
|
3044 |
"shape": [
|
3045 |
+
960,
|
3046 |
+
18
|
3047 |
],
|
3048 |
"dtype": "float16",
|
3049 |
"format": "f32-to-bf16",
|
|
|
3053 |
{
|
3054 |
"name": "model.layers.7.self_attn.o_proj.q_weight",
|
3055 |
"shape": [
|
3056 |
+
576,
|
3057 |
+
72
|
3058 |
],
|
3059 |
"dtype": "uint32",
|
3060 |
"format": "f32-to-bf16",
|
|
|
3064 |
{
|
3065 |
"name": "model.layers.7.self_attn.o_proj.q_scale",
|
3066 |
"shape": [
|
3067 |
+
576,
|
3068 |
+
18
|
3069 |
],
|
3070 |
"dtype": "float16",
|
3071 |
"format": "f32-to-bf16",
|
|
|
3085 |
{
|
3086 |
"name": "model.layers.8.mlp.down_proj.q_weight",
|
3087 |
"shape": [
|
3088 |
+
576,
|
3089 |
+
192
|
3090 |
],
|
3091 |
"dtype": "uint32",
|
3092 |
"format": "f32-to-bf16",
|
|
|
3096 |
{
|
3097 |
"name": "model.layers.8.mlp.down_proj.q_scale",
|
3098 |
"shape": [
|
3099 |
+
576,
|
3100 |
+
48
|
3101 |
],
|
3102 |
"dtype": "float16",
|
3103 |
"format": "f32-to-bf16",
|
|
|
3107 |
{
|
3108 |
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
|
3109 |
"shape": [
|
3110 |
+
3072,
|
3111 |
+
72
|
3112 |
],
|
3113 |
"dtype": "uint32",
|
3114 |
"format": "f32-to-bf16",
|
|
|
3118 |
{
|
3119 |
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
|
3120 |
"shape": [
|
3121 |
+
3072,
|
3122 |
+
18
|
3123 |
],
|
3124 |
"dtype": "float16",
|
3125 |
"format": "f32-to-bf16",
|
|
|
3139 |
{
|
3140 |
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
|
3141 |
"shape": [
|
3142 |
+
960,
|
3143 |
+
72
|
3144 |
],
|
3145 |
"dtype": "uint32",
|
3146 |
"format": "f32-to-bf16",
|
|
|
3150 |
{
|
3151 |
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
|
3152 |
"shape": [
|
3153 |
+
960,
|
3154 |
+
18
|
3155 |
],
|
3156 |
"dtype": "float16",
|
3157 |
"format": "f32-to-bf16",
|
|
|
3161 |
{
|
3162 |
"name": "model.layers.8.self_attn.o_proj.q_weight",
|
3163 |
"shape": [
|
3164 |
+
576,
|
3165 |
+
72
|
3166 |
],
|
3167 |
"dtype": "uint32",
|
3168 |
"format": "f32-to-bf16",
|
|
|
3172 |
{
|
3173 |
"name": "model.layers.8.self_attn.o_proj.q_scale",
|
3174 |
"shape": [
|
3175 |
+
576,
|
3176 |
+
18
|
3177 |
],
|
3178 |
"dtype": "float16",
|
3179 |
"format": "f32-to-bf16",
|
|
|
3193 |
{
|
3194 |
"name": "model.layers.9.mlp.down_proj.q_weight",
|
3195 |
"shape": [
|
3196 |
+
576,
|
3197 |
+
192
|
3198 |
],
|
3199 |
"dtype": "uint32",
|
3200 |
"format": "f32-to-bf16",
|
|
|
3204 |
{
|
3205 |
"name": "model.layers.9.mlp.down_proj.q_scale",
|
3206 |
"shape": [
|
3207 |
+
576,
|
3208 |
+
48
|
3209 |
],
|
3210 |
"dtype": "float16",
|
3211 |
"format": "f32-to-bf16",
|
|
|
3215 |
{
|
3216 |
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
|
3217 |
"shape": [
|
3218 |
+
3072,
|
3219 |
+
72
|
3220 |
],
|
3221 |
"dtype": "uint32",
|
3222 |
"format": "f32-to-bf16",
|
|
|
3226 |
{
|
3227 |
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
|
3228 |
"shape": [
|
3229 |
+
3072,
|
3230 |
+
18
|
3231 |
],
|
3232 |
"dtype": "float16",
|
3233 |
"format": "f32-to-bf16",
|
|
|
3247 |
{
|
3248 |
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
|
3249 |
"shape": [
|
3250 |
+
960,
|
3251 |
+
72
|
3252 |
],
|
3253 |
"dtype": "uint32",
|
3254 |
"format": "f32-to-bf16",
|
|
|
3258 |
{
|
3259 |
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
|
3260 |
"shape": [
|
3261 |
+
960,
|
3262 |
+
18
|
3263 |
],
|
3264 |
"dtype": "float16",
|
3265 |
"format": "f32-to-bf16",
|
|
|
3269 |
{
|
3270 |
"name": "model.layers.9.self_attn.o_proj.q_weight",
|
3271 |
"shape": [
|
3272 |
+
576,
|
3273 |
+
72
|
3274 |
],
|
3275 |
"dtype": "uint32",
|
3276 |
"format": "f32-to-bf16",
|
|
|
3280 |
{
|
3281 |
"name": "model.layers.9.self_attn.o_proj.q_scale",
|
3282 |
"shape": [
|
3283 |
+
576,
|
3284 |
+
18
|
3285 |
],
|
3286 |
"dtype": "float16",
|
3287 |
"format": "f32-to-bf16",
|
|
|
3299 |
"byteOffset": 9465984
|
3300 |
}
|
3301 |
],
|
3302 |
+
"md5sum": "c3261fefb84bf0a587f6b809c586f774"
|
3303 |
}
|
3304 |
]
|
3305 |
}
|
params_shard_0.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 33364548
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6a23cedd1d63be5a27a9eac044e9eb9c37119da510f85f34636fefcfbb498815
|
3 |
size 33364548
|
params_shard_1.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 32883840
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b54c0abc4d7bd83c814daeae4dde2ce272c5fe19f8ca2c0477e635159e3b2be9
|
3 |
size 32883840
|
params_shard_2.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 9467136
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:452584b4734e87b7e108b734e61a81d9af433d38bdc76be757fc02e068be3c18
|
3 |
size 9467136
|