Saurav Panda commited on
Commit
0ffcd1d
·
1 Parent(s): 19020e6

Fixing model loading issue

Browse files
ndarray-cache.json CHANGED
@@ -45,8 +45,8 @@
45
  {
46
  "name": "model.layers.0.mlp.down_proj.q_weight",
47
  "shape": [
48
- 192,
49
- 576
50
  ],
51
  "dtype": "uint32",
52
  "format": "f32-to-bf16",
@@ -56,8 +56,8 @@
56
  {
57
  "name": "model.layers.0.mlp.down_proj.q_scale",
58
  "shape": [
59
- 48,
60
- 576
61
  ],
62
  "dtype": "float16",
63
  "format": "f32-to-bf16",
@@ -67,8 +67,8 @@
67
  {
68
  "name": "model.layers.0.mlp.gate_up_proj.q_weight",
69
  "shape": [
70
- 72,
71
- 3072
72
  ],
73
  "dtype": "uint32",
74
  "format": "f32-to-bf16",
@@ -78,8 +78,8 @@
78
  {
79
  "name": "model.layers.0.mlp.gate_up_proj.q_scale",
80
  "shape": [
81
- 18,
82
- 3072
83
  ],
84
  "dtype": "float16",
85
  "format": "f32-to-bf16",
@@ -99,8 +99,8 @@
99
  {
100
  "name": "model.layers.0.self_attn.qkv_proj.q_weight",
101
  "shape": [
102
- 72,
103
- 960
104
  ],
105
  "dtype": "uint32",
106
  "format": "f32-to-bf16",
@@ -110,8 +110,8 @@
110
  {
111
  "name": "model.layers.0.self_attn.qkv_proj.q_scale",
112
  "shape": [
113
- 18,
114
- 960
115
  ],
116
  "dtype": "float16",
117
  "format": "f32-to-bf16",
@@ -121,8 +121,8 @@
121
  {
122
  "name": "model.layers.0.self_attn.o_proj.q_weight",
123
  "shape": [
124
- 72,
125
- 576
126
  ],
127
  "dtype": "uint32",
128
  "format": "f32-to-bf16",
@@ -132,8 +132,8 @@
132
  {
133
  "name": "model.layers.0.self_attn.o_proj.q_scale",
134
  "shape": [
135
- 18,
136
- 576
137
  ],
138
  "dtype": "float16",
139
  "format": "f32-to-bf16",
@@ -153,8 +153,8 @@
153
  {
154
  "name": "model.layers.1.mlp.down_proj.q_weight",
155
  "shape": [
156
- 192,
157
- 576
158
  ],
159
  "dtype": "uint32",
160
  "format": "f32-to-bf16",
@@ -164,8 +164,8 @@
164
  {
165
  "name": "model.layers.1.mlp.down_proj.q_scale",
166
  "shape": [
167
- 48,
168
- 576
169
  ],
170
  "dtype": "float16",
171
  "format": "f32-to-bf16",
@@ -175,8 +175,8 @@
175
  {
176
  "name": "model.layers.1.mlp.gate_up_proj.q_weight",
177
  "shape": [
178
- 72,
179
- 3072
180
  ],
181
  "dtype": "uint32",
182
  "format": "f32-to-bf16",
@@ -186,8 +186,8 @@
186
  {
187
  "name": "model.layers.1.mlp.gate_up_proj.q_scale",
188
  "shape": [
189
- 18,
190
- 3072
191
  ],
192
  "dtype": "float16",
193
  "format": "f32-to-bf16",
@@ -207,8 +207,8 @@
207
  {
208
  "name": "model.layers.1.self_attn.qkv_proj.q_weight",
209
  "shape": [
210
- 72,
211
- 960
212
  ],
213
  "dtype": "uint32",
214
  "format": "f32-to-bf16",
@@ -218,8 +218,8 @@
218
  {
219
  "name": "model.layers.1.self_attn.qkv_proj.q_scale",
220
  "shape": [
221
- 18,
222
- 960
223
  ],
224
  "dtype": "float16",
225
  "format": "f32-to-bf16",
@@ -229,8 +229,8 @@
229
  {
230
  "name": "model.layers.1.self_attn.o_proj.q_weight",
231
  "shape": [
232
- 72,
233
- 576
234
  ],
235
  "dtype": "uint32",
236
  "format": "f32-to-bf16",
@@ -240,8 +240,8 @@
240
  {
241
  "name": "model.layers.1.self_attn.o_proj.q_scale",
242
  "shape": [
243
- 18,
244
- 576
245
  ],
246
  "dtype": "float16",
247
  "format": "f32-to-bf16",
@@ -261,8 +261,8 @@
261
  {
262
  "name": "model.layers.10.mlp.down_proj.q_weight",
263
  "shape": [
264
- 192,
265
- 576
266
  ],
267
  "dtype": "uint32",
268
  "format": "f32-to-bf16",
@@ -272,8 +272,8 @@
272
  {
273
  "name": "model.layers.10.mlp.down_proj.q_scale",
274
  "shape": [
275
- 48,
276
- 576
277
  ],
278
  "dtype": "float16",
279
  "format": "f32-to-bf16",
@@ -283,8 +283,8 @@
283
  {
284
  "name": "model.layers.10.mlp.gate_up_proj.q_weight",
285
  "shape": [
286
- 72,
287
- 3072
288
  ],
289
  "dtype": "uint32",
290
  "format": "f32-to-bf16",
@@ -294,8 +294,8 @@
294
  {
295
  "name": "model.layers.10.mlp.gate_up_proj.q_scale",
296
  "shape": [
297
- 18,
298
- 3072
299
  ],
300
  "dtype": "float16",
301
  "format": "f32-to-bf16",
@@ -315,8 +315,8 @@
315
  {
316
  "name": "model.layers.10.self_attn.qkv_proj.q_weight",
317
  "shape": [
318
- 72,
319
- 960
320
  ],
321
  "dtype": "uint32",
322
  "format": "f32-to-bf16",
@@ -326,8 +326,8 @@
326
  {
327
  "name": "model.layers.10.self_attn.qkv_proj.q_scale",
328
  "shape": [
329
- 18,
330
- 960
331
  ],
332
  "dtype": "float16",
333
  "format": "f32-to-bf16",
@@ -337,8 +337,8 @@
337
  {
338
  "name": "model.layers.10.self_attn.o_proj.q_weight",
339
  "shape": [
340
- 72,
341
- 576
342
  ],
343
  "dtype": "uint32",
344
  "format": "f32-to-bf16",
@@ -348,8 +348,8 @@
348
  {
349
  "name": "model.layers.10.self_attn.o_proj.q_scale",
350
  "shape": [
351
- 18,
352
- 576
353
  ],
354
  "dtype": "float16",
355
  "format": "f32-to-bf16",
@@ -369,8 +369,8 @@
369
  {
370
  "name": "model.layers.11.mlp.down_proj.q_weight",
371
  "shape": [
372
- 192,
373
- 576
374
  ],
375
  "dtype": "uint32",
376
  "format": "f32-to-bf16",
@@ -380,8 +380,8 @@
380
  {
381
  "name": "model.layers.11.mlp.down_proj.q_scale",
382
  "shape": [
383
- 48,
384
- 576
385
  ],
386
  "dtype": "float16",
387
  "format": "f32-to-bf16",
@@ -391,8 +391,8 @@
391
  {
392
  "name": "model.layers.11.mlp.gate_up_proj.q_weight",
393
  "shape": [
394
- 72,
395
- 3072
396
  ],
397
  "dtype": "uint32",
398
  "format": "f32-to-bf16",
@@ -402,8 +402,8 @@
402
  {
403
  "name": "model.layers.11.mlp.gate_up_proj.q_scale",
404
  "shape": [
405
- 18,
406
- 3072
407
  ],
408
  "dtype": "float16",
409
  "format": "f32-to-bf16",
@@ -423,8 +423,8 @@
423
  {
424
  "name": "model.layers.11.self_attn.qkv_proj.q_weight",
425
  "shape": [
426
- 72,
427
- 960
428
  ],
429
  "dtype": "uint32",
430
  "format": "f32-to-bf16",
@@ -434,8 +434,8 @@
434
  {
435
  "name": "model.layers.11.self_attn.qkv_proj.q_scale",
436
  "shape": [
437
- 18,
438
- 960
439
  ],
440
  "dtype": "float16",
441
  "format": "f32-to-bf16",
@@ -445,8 +445,8 @@
445
  {
446
  "name": "model.layers.11.self_attn.o_proj.q_weight",
447
  "shape": [
448
- 72,
449
- 576
450
  ],
451
  "dtype": "uint32",
452
  "format": "f32-to-bf16",
@@ -456,8 +456,8 @@
456
  {
457
  "name": "model.layers.11.self_attn.o_proj.q_scale",
458
  "shape": [
459
- 18,
460
- 576
461
  ],
462
  "dtype": "float16",
463
  "format": "f32-to-bf16",
@@ -477,8 +477,8 @@
477
  {
478
  "name": "model.layers.12.mlp.down_proj.q_weight",
479
  "shape": [
480
- 192,
481
- 576
482
  ],
483
  "dtype": "uint32",
484
  "format": "f32-to-bf16",
@@ -488,8 +488,8 @@
488
  {
489
  "name": "model.layers.12.mlp.down_proj.q_scale",
490
  "shape": [
491
- 48,
492
- 576
493
  ],
494
  "dtype": "float16",
495
  "format": "f32-to-bf16",
@@ -499,8 +499,8 @@
499
  {
500
  "name": "model.layers.12.mlp.gate_up_proj.q_weight",
501
  "shape": [
502
- 72,
503
- 3072
504
  ],
505
  "dtype": "uint32",
506
  "format": "f32-to-bf16",
@@ -510,8 +510,8 @@
510
  {
511
  "name": "model.layers.12.mlp.gate_up_proj.q_scale",
512
  "shape": [
513
- 18,
514
- 3072
515
  ],
516
  "dtype": "float16",
517
  "format": "f32-to-bf16",
@@ -531,8 +531,8 @@
531
  {
532
  "name": "model.layers.12.self_attn.qkv_proj.q_weight",
533
  "shape": [
534
- 72,
535
- 960
536
  ],
537
  "dtype": "uint32",
538
  "format": "f32-to-bf16",
@@ -542,8 +542,8 @@
542
  {
543
  "name": "model.layers.12.self_attn.qkv_proj.q_scale",
544
  "shape": [
545
- 18,
546
- 960
547
  ],
548
  "dtype": "float16",
549
  "format": "f32-to-bf16",
@@ -553,8 +553,8 @@
553
  {
554
  "name": "model.layers.12.self_attn.o_proj.q_weight",
555
  "shape": [
556
- 72,
557
- 576
558
  ],
559
  "dtype": "uint32",
560
  "format": "f32-to-bf16",
@@ -564,8 +564,8 @@
564
  {
565
  "name": "model.layers.12.self_attn.o_proj.q_scale",
566
  "shape": [
567
- 18,
568
- 576
569
  ],
570
  "dtype": "float16",
571
  "format": "f32-to-bf16",
@@ -585,8 +585,8 @@
585
  {
586
  "name": "model.layers.13.mlp.down_proj.q_weight",
587
  "shape": [
588
- 192,
589
- 576
590
  ],
591
  "dtype": "uint32",
592
  "format": "f32-to-bf16",
@@ -596,8 +596,8 @@
596
  {
597
  "name": "model.layers.13.mlp.down_proj.q_scale",
598
  "shape": [
599
- 48,
600
- 576
601
  ],
602
  "dtype": "float16",
603
  "format": "f32-to-bf16",
@@ -607,8 +607,8 @@
607
  {
608
  "name": "model.layers.13.mlp.gate_up_proj.q_weight",
609
  "shape": [
610
- 72,
611
- 3072
612
  ],
613
  "dtype": "uint32",
614
  "format": "f32-to-bf16",
@@ -618,8 +618,8 @@
618
  {
619
  "name": "model.layers.13.mlp.gate_up_proj.q_scale",
620
  "shape": [
621
- 18,
622
- 3072
623
  ],
624
  "dtype": "float16",
625
  "format": "f32-to-bf16",
@@ -639,8 +639,8 @@
639
  {
640
  "name": "model.layers.13.self_attn.qkv_proj.q_weight",
641
  "shape": [
642
- 72,
643
- 960
644
  ],
645
  "dtype": "uint32",
646
  "format": "f32-to-bf16",
@@ -650,8 +650,8 @@
650
  {
651
  "name": "model.layers.13.self_attn.qkv_proj.q_scale",
652
  "shape": [
653
- 18,
654
- 960
655
  ],
656
  "dtype": "float16",
657
  "format": "f32-to-bf16",
@@ -661,8 +661,8 @@
661
  {
662
  "name": "model.layers.13.self_attn.o_proj.q_weight",
663
  "shape": [
664
- 72,
665
- 576
666
  ],
667
  "dtype": "uint32",
668
  "format": "f32-to-bf16",
@@ -672,8 +672,8 @@
672
  {
673
  "name": "model.layers.13.self_attn.o_proj.q_scale",
674
  "shape": [
675
- 18,
676
- 576
677
  ],
678
  "dtype": "float16",
679
  "format": "f32-to-bf16",
@@ -693,8 +693,8 @@
693
  {
694
  "name": "model.layers.14.mlp.down_proj.q_weight",
695
  "shape": [
696
- 192,
697
- 576
698
  ],
699
  "dtype": "uint32",
700
  "format": "f32-to-bf16",
@@ -704,8 +704,8 @@
704
  {
705
  "name": "model.layers.14.mlp.down_proj.q_scale",
706
  "shape": [
707
- 48,
708
- 576
709
  ],
710
  "dtype": "float16",
711
  "format": "f32-to-bf16",
@@ -715,8 +715,8 @@
715
  {
716
  "name": "model.layers.14.mlp.gate_up_proj.q_weight",
717
  "shape": [
718
- 72,
719
- 3072
720
  ],
721
  "dtype": "uint32",
722
  "format": "f32-to-bf16",
@@ -726,8 +726,8 @@
726
  {
727
  "name": "model.layers.14.mlp.gate_up_proj.q_scale",
728
  "shape": [
729
- 18,
730
- 3072
731
  ],
732
  "dtype": "float16",
733
  "format": "f32-to-bf16",
@@ -747,8 +747,8 @@
747
  {
748
  "name": "model.layers.14.self_attn.qkv_proj.q_weight",
749
  "shape": [
750
- 72,
751
- 960
752
  ],
753
  "dtype": "uint32",
754
  "format": "f32-to-bf16",
@@ -758,8 +758,8 @@
758
  {
759
  "name": "model.layers.14.self_attn.qkv_proj.q_scale",
760
  "shape": [
761
- 18,
762
- 960
763
  ],
764
  "dtype": "float16",
765
  "format": "f32-to-bf16",
@@ -769,8 +769,8 @@
769
  {
770
  "name": "model.layers.14.self_attn.o_proj.q_weight",
771
  "shape": [
772
- 72,
773
- 576
774
  ],
775
  "dtype": "uint32",
776
  "format": "f32-to-bf16",
@@ -780,8 +780,8 @@
780
  {
781
  "name": "model.layers.14.self_attn.o_proj.q_scale",
782
  "shape": [
783
- 18,
784
- 576
785
  ],
786
  "dtype": "float16",
787
  "format": "f32-to-bf16",
@@ -801,8 +801,8 @@
801
  {
802
  "name": "model.layers.15.mlp.down_proj.q_weight",
803
  "shape": [
804
- 192,
805
- 576
806
  ],
807
  "dtype": "uint32",
808
  "format": "f32-to-bf16",
@@ -812,8 +812,8 @@
812
  {
813
  "name": "model.layers.15.mlp.down_proj.q_scale",
814
  "shape": [
815
- 48,
816
- 576
817
  ],
818
  "dtype": "float16",
819
  "format": "f32-to-bf16",
@@ -823,8 +823,8 @@
823
  {
824
  "name": "model.layers.15.mlp.gate_up_proj.q_weight",
825
  "shape": [
826
- 72,
827
- 3072
828
  ],
829
  "dtype": "uint32",
830
  "format": "f32-to-bf16",
@@ -834,8 +834,8 @@
834
  {
835
  "name": "model.layers.15.mlp.gate_up_proj.q_scale",
836
  "shape": [
837
- 18,
838
- 3072
839
  ],
840
  "dtype": "float16",
841
  "format": "f32-to-bf16",
@@ -855,8 +855,8 @@
855
  {
856
  "name": "model.layers.15.self_attn.qkv_proj.q_weight",
857
  "shape": [
858
- 72,
859
- 960
860
  ],
861
  "dtype": "uint32",
862
  "format": "f32-to-bf16",
@@ -866,8 +866,8 @@
866
  {
867
  "name": "model.layers.15.self_attn.qkv_proj.q_scale",
868
  "shape": [
869
- 18,
870
- 960
871
  ],
872
  "dtype": "float16",
873
  "format": "f32-to-bf16",
@@ -877,8 +877,8 @@
877
  {
878
  "name": "model.layers.15.self_attn.o_proj.q_weight",
879
  "shape": [
880
- 72,
881
- 576
882
  ],
883
  "dtype": "uint32",
884
  "format": "f32-to-bf16",
@@ -888,8 +888,8 @@
888
  {
889
  "name": "model.layers.15.self_attn.o_proj.q_scale",
890
  "shape": [
891
- 18,
892
- 576
893
  ],
894
  "dtype": "float16",
895
  "format": "f32-to-bf16",
@@ -909,8 +909,8 @@
909
  {
910
  "name": "model.layers.16.mlp.down_proj.q_weight",
911
  "shape": [
912
- 192,
913
- 576
914
  ],
915
  "dtype": "uint32",
916
  "format": "f32-to-bf16",
@@ -920,8 +920,8 @@
920
  {
921
  "name": "model.layers.16.mlp.down_proj.q_scale",
922
  "shape": [
923
- 48,
924
- 576
925
  ],
926
  "dtype": "float16",
927
  "format": "f32-to-bf16",
@@ -931,8 +931,8 @@
931
  {
932
  "name": "model.layers.16.mlp.gate_up_proj.q_weight",
933
  "shape": [
934
- 72,
935
- 3072
936
  ],
937
  "dtype": "uint32",
938
  "format": "f32-to-bf16",
@@ -942,8 +942,8 @@
942
  {
943
  "name": "model.layers.16.mlp.gate_up_proj.q_scale",
944
  "shape": [
945
- 18,
946
- 3072
947
  ],
948
  "dtype": "float16",
949
  "format": "f32-to-bf16",
@@ -961,7 +961,7 @@
961
  "byteOffset": 33363396
962
  }
963
  ],
964
- "md5sum": "1a30efc6aee63e0853a4d12881da65b7"
965
  },
966
  {
967
  "dataPath": "params_shard_1.bin",
@@ -971,8 +971,8 @@
971
  {
972
  "name": "model.layers.16.self_attn.qkv_proj.q_weight",
973
  "shape": [
974
- 72,
975
- 960
976
  ],
977
  "dtype": "uint32",
978
  "format": "f32-to-bf16",
@@ -982,8 +982,8 @@
982
  {
983
  "name": "model.layers.16.self_attn.qkv_proj.q_scale",
984
  "shape": [
985
- 18,
986
- 960
987
  ],
988
  "dtype": "float16",
989
  "format": "f32-to-bf16",
@@ -993,8 +993,8 @@
993
  {
994
  "name": "model.layers.16.self_attn.o_proj.q_weight",
995
  "shape": [
996
- 72,
997
- 576
998
  ],
999
  "dtype": "uint32",
1000
  "format": "f32-to-bf16",
@@ -1004,8 +1004,8 @@
1004
  {
1005
  "name": "model.layers.16.self_attn.o_proj.q_scale",
1006
  "shape": [
1007
- 18,
1008
- 576
1009
  ],
1010
  "dtype": "float16",
1011
  "format": "f32-to-bf16",
@@ -1025,8 +1025,8 @@
1025
  {
1026
  "name": "model.layers.17.mlp.down_proj.q_weight",
1027
  "shape": [
1028
- 192,
1029
- 576
1030
  ],
1031
  "dtype": "uint32",
1032
  "format": "f32-to-bf16",
@@ -1036,8 +1036,8 @@
1036
  {
1037
  "name": "model.layers.17.mlp.down_proj.q_scale",
1038
  "shape": [
1039
- 48,
1040
- 576
1041
  ],
1042
  "dtype": "float16",
1043
  "format": "f32-to-bf16",
@@ -1047,8 +1047,8 @@
1047
  {
1048
  "name": "model.layers.17.mlp.gate_up_proj.q_weight",
1049
  "shape": [
1050
- 72,
1051
- 3072
1052
  ],
1053
  "dtype": "uint32",
1054
  "format": "f32-to-bf16",
@@ -1058,8 +1058,8 @@
1058
  {
1059
  "name": "model.layers.17.mlp.gate_up_proj.q_scale",
1060
  "shape": [
1061
- 18,
1062
- 3072
1063
  ],
1064
  "dtype": "float16",
1065
  "format": "f32-to-bf16",
@@ -1079,8 +1079,8 @@
1079
  {
1080
  "name": "model.layers.17.self_attn.qkv_proj.q_weight",
1081
  "shape": [
1082
- 72,
1083
- 960
1084
  ],
1085
  "dtype": "uint32",
1086
  "format": "f32-to-bf16",
@@ -1090,8 +1090,8 @@
1090
  {
1091
  "name": "model.layers.17.self_attn.qkv_proj.q_scale",
1092
  "shape": [
1093
- 18,
1094
- 960
1095
  ],
1096
  "dtype": "float16",
1097
  "format": "f32-to-bf16",
@@ -1101,8 +1101,8 @@
1101
  {
1102
  "name": "model.layers.17.self_attn.o_proj.q_weight",
1103
  "shape": [
1104
- 72,
1105
- 576
1106
  ],
1107
  "dtype": "uint32",
1108
  "format": "f32-to-bf16",
@@ -1112,8 +1112,8 @@
1112
  {
1113
  "name": "model.layers.17.self_attn.o_proj.q_scale",
1114
  "shape": [
1115
- 18,
1116
- 576
1117
  ],
1118
  "dtype": "float16",
1119
  "format": "f32-to-bf16",
@@ -1133,8 +1133,8 @@
1133
  {
1134
  "name": "model.layers.18.mlp.down_proj.q_weight",
1135
  "shape": [
1136
- 192,
1137
- 576
1138
  ],
1139
  "dtype": "uint32",
1140
  "format": "f32-to-bf16",
@@ -1144,8 +1144,8 @@
1144
  {
1145
  "name": "model.layers.18.mlp.down_proj.q_scale",
1146
  "shape": [
1147
- 48,
1148
- 576
1149
  ],
1150
  "dtype": "float16",
1151
  "format": "f32-to-bf16",
@@ -1155,8 +1155,8 @@
1155
  {
1156
  "name": "model.layers.18.mlp.gate_up_proj.q_weight",
1157
  "shape": [
1158
- 72,
1159
- 3072
1160
  ],
1161
  "dtype": "uint32",
1162
  "format": "f32-to-bf16",
@@ -1166,8 +1166,8 @@
1166
  {
1167
  "name": "model.layers.18.mlp.gate_up_proj.q_scale",
1168
  "shape": [
1169
- 18,
1170
- 3072
1171
  ],
1172
  "dtype": "float16",
1173
  "format": "f32-to-bf16",
@@ -1187,8 +1187,8 @@
1187
  {
1188
  "name": "model.layers.18.self_attn.qkv_proj.q_weight",
1189
  "shape": [
1190
- 72,
1191
- 960
1192
  ],
1193
  "dtype": "uint32",
1194
  "format": "f32-to-bf16",
@@ -1198,8 +1198,8 @@
1198
  {
1199
  "name": "model.layers.18.self_attn.qkv_proj.q_scale",
1200
  "shape": [
1201
- 18,
1202
- 960
1203
  ],
1204
  "dtype": "float16",
1205
  "format": "f32-to-bf16",
@@ -1209,8 +1209,8 @@
1209
  {
1210
  "name": "model.layers.18.self_attn.o_proj.q_weight",
1211
  "shape": [
1212
- 72,
1213
- 576
1214
  ],
1215
  "dtype": "uint32",
1216
  "format": "f32-to-bf16",
@@ -1220,8 +1220,8 @@
1220
  {
1221
  "name": "model.layers.18.self_attn.o_proj.q_scale",
1222
  "shape": [
1223
- 18,
1224
- 576
1225
  ],
1226
  "dtype": "float16",
1227
  "format": "f32-to-bf16",
@@ -1241,8 +1241,8 @@
1241
  {
1242
  "name": "model.layers.19.mlp.down_proj.q_weight",
1243
  "shape": [
1244
- 192,
1245
- 576
1246
  ],
1247
  "dtype": "uint32",
1248
  "format": "f32-to-bf16",
@@ -1252,8 +1252,8 @@
1252
  {
1253
  "name": "model.layers.19.mlp.down_proj.q_scale",
1254
  "shape": [
1255
- 48,
1256
- 576
1257
  ],
1258
  "dtype": "float16",
1259
  "format": "f32-to-bf16",
@@ -1263,8 +1263,8 @@
1263
  {
1264
  "name": "model.layers.19.mlp.gate_up_proj.q_weight",
1265
  "shape": [
1266
- 72,
1267
- 3072
1268
  ],
1269
  "dtype": "uint32",
1270
  "format": "f32-to-bf16",
@@ -1274,8 +1274,8 @@
1274
  {
1275
  "name": "model.layers.19.mlp.gate_up_proj.q_scale",
1276
  "shape": [
1277
- 18,
1278
- 3072
1279
  ],
1280
  "dtype": "float16",
1281
  "format": "f32-to-bf16",
@@ -1295,8 +1295,8 @@
1295
  {
1296
  "name": "model.layers.19.self_attn.qkv_proj.q_weight",
1297
  "shape": [
1298
- 72,
1299
- 960
1300
  ],
1301
  "dtype": "uint32",
1302
  "format": "f32-to-bf16",
@@ -1306,8 +1306,8 @@
1306
  {
1307
  "name": "model.layers.19.self_attn.qkv_proj.q_scale",
1308
  "shape": [
1309
- 18,
1310
- 960
1311
  ],
1312
  "dtype": "float16",
1313
  "format": "f32-to-bf16",
@@ -1317,8 +1317,8 @@
1317
  {
1318
  "name": "model.layers.19.self_attn.o_proj.q_weight",
1319
  "shape": [
1320
- 72,
1321
- 576
1322
  ],
1323
  "dtype": "uint32",
1324
  "format": "f32-to-bf16",
@@ -1328,8 +1328,8 @@
1328
  {
1329
  "name": "model.layers.19.self_attn.o_proj.q_scale",
1330
  "shape": [
1331
- 18,
1332
- 576
1333
  ],
1334
  "dtype": "float16",
1335
  "format": "f32-to-bf16",
@@ -1349,8 +1349,8 @@
1349
  {
1350
  "name": "model.layers.2.mlp.down_proj.q_weight",
1351
  "shape": [
1352
- 192,
1353
- 576
1354
  ],
1355
  "dtype": "uint32",
1356
  "format": "f32-to-bf16",
@@ -1360,8 +1360,8 @@
1360
  {
1361
  "name": "model.layers.2.mlp.down_proj.q_scale",
1362
  "shape": [
1363
- 48,
1364
- 576
1365
  ],
1366
  "dtype": "float16",
1367
  "format": "f32-to-bf16",
@@ -1371,8 +1371,8 @@
1371
  {
1372
  "name": "model.layers.2.mlp.gate_up_proj.q_weight",
1373
  "shape": [
1374
- 72,
1375
- 3072
1376
  ],
1377
  "dtype": "uint32",
1378
  "format": "f32-to-bf16",
@@ -1382,8 +1382,8 @@
1382
  {
1383
  "name": "model.layers.2.mlp.gate_up_proj.q_scale",
1384
  "shape": [
1385
- 18,
1386
- 3072
1387
  ],
1388
  "dtype": "float16",
1389
  "format": "f32-to-bf16",
@@ -1403,8 +1403,8 @@
1403
  {
1404
  "name": "model.layers.2.self_attn.qkv_proj.q_weight",
1405
  "shape": [
1406
- 72,
1407
- 960
1408
  ],
1409
  "dtype": "uint32",
1410
  "format": "f32-to-bf16",
@@ -1414,8 +1414,8 @@
1414
  {
1415
  "name": "model.layers.2.self_attn.qkv_proj.q_scale",
1416
  "shape": [
1417
- 18,
1418
- 960
1419
  ],
1420
  "dtype": "float16",
1421
  "format": "f32-to-bf16",
@@ -1425,8 +1425,8 @@
1425
  {
1426
  "name": "model.layers.2.self_attn.o_proj.q_weight",
1427
  "shape": [
1428
- 72,
1429
- 576
1430
  ],
1431
  "dtype": "uint32",
1432
  "format": "f32-to-bf16",
@@ -1436,8 +1436,8 @@
1436
  {
1437
  "name": "model.layers.2.self_attn.o_proj.q_scale",
1438
  "shape": [
1439
- 18,
1440
- 576
1441
  ],
1442
  "dtype": "float16",
1443
  "format": "f32-to-bf16",
@@ -1457,8 +1457,8 @@
1457
  {
1458
  "name": "model.layers.20.mlp.down_proj.q_weight",
1459
  "shape": [
1460
- 192,
1461
- 576
1462
  ],
1463
  "dtype": "uint32",
1464
  "format": "f32-to-bf16",
@@ -1468,8 +1468,8 @@
1468
  {
1469
  "name": "model.layers.20.mlp.down_proj.q_scale",
1470
  "shape": [
1471
- 48,
1472
- 576
1473
  ],
1474
  "dtype": "float16",
1475
  "format": "f32-to-bf16",
@@ -1479,8 +1479,8 @@
1479
  {
1480
  "name": "model.layers.20.mlp.gate_up_proj.q_weight",
1481
  "shape": [
1482
- 72,
1483
- 3072
1484
  ],
1485
  "dtype": "uint32",
1486
  "format": "f32-to-bf16",
@@ -1490,8 +1490,8 @@
1490
  {
1491
  "name": "model.layers.20.mlp.gate_up_proj.q_scale",
1492
  "shape": [
1493
- 18,
1494
- 3072
1495
  ],
1496
  "dtype": "float16",
1497
  "format": "f32-to-bf16",
@@ -1511,8 +1511,8 @@
1511
  {
1512
  "name": "model.layers.20.self_attn.qkv_proj.q_weight",
1513
  "shape": [
1514
- 72,
1515
- 960
1516
  ],
1517
  "dtype": "uint32",
1518
  "format": "f32-to-bf16",
@@ -1522,8 +1522,8 @@
1522
  {
1523
  "name": "model.layers.20.self_attn.qkv_proj.q_scale",
1524
  "shape": [
1525
- 18,
1526
- 960
1527
  ],
1528
  "dtype": "float16",
1529
  "format": "f32-to-bf16",
@@ -1533,8 +1533,8 @@
1533
  {
1534
  "name": "model.layers.20.self_attn.o_proj.q_weight",
1535
  "shape": [
1536
- 72,
1537
- 576
1538
  ],
1539
  "dtype": "uint32",
1540
  "format": "f32-to-bf16",
@@ -1544,8 +1544,8 @@
1544
  {
1545
  "name": "model.layers.20.self_attn.o_proj.q_scale",
1546
  "shape": [
1547
- 18,
1548
- 576
1549
  ],
1550
  "dtype": "float16",
1551
  "format": "f32-to-bf16",
@@ -1565,8 +1565,8 @@
1565
  {
1566
  "name": "model.layers.21.mlp.down_proj.q_weight",
1567
  "shape": [
1568
- 192,
1569
- 576
1570
  ],
1571
  "dtype": "uint32",
1572
  "format": "f32-to-bf16",
@@ -1576,8 +1576,8 @@
1576
  {
1577
  "name": "model.layers.21.mlp.down_proj.q_scale",
1578
  "shape": [
1579
- 48,
1580
- 576
1581
  ],
1582
  "dtype": "float16",
1583
  "format": "f32-to-bf16",
@@ -1587,8 +1587,8 @@
1587
  {
1588
  "name": "model.layers.21.mlp.gate_up_proj.q_weight",
1589
  "shape": [
1590
- 72,
1591
- 3072
1592
  ],
1593
  "dtype": "uint32",
1594
  "format": "f32-to-bf16",
@@ -1598,8 +1598,8 @@
1598
  {
1599
  "name": "model.layers.21.mlp.gate_up_proj.q_scale",
1600
  "shape": [
1601
- 18,
1602
- 3072
1603
  ],
1604
  "dtype": "float16",
1605
  "format": "f32-to-bf16",
@@ -1619,8 +1619,8 @@
1619
  {
1620
  "name": "model.layers.21.self_attn.qkv_proj.q_weight",
1621
  "shape": [
1622
- 72,
1623
- 960
1624
  ],
1625
  "dtype": "uint32",
1626
  "format": "f32-to-bf16",
@@ -1630,8 +1630,8 @@
1630
  {
1631
  "name": "model.layers.21.self_attn.qkv_proj.q_scale",
1632
  "shape": [
1633
- 18,
1634
- 960
1635
  ],
1636
  "dtype": "float16",
1637
  "format": "f32-to-bf16",
@@ -1641,8 +1641,8 @@
1641
  {
1642
  "name": "model.layers.21.self_attn.o_proj.q_weight",
1643
  "shape": [
1644
- 72,
1645
- 576
1646
  ],
1647
  "dtype": "uint32",
1648
  "format": "f32-to-bf16",
@@ -1652,8 +1652,8 @@
1652
  {
1653
  "name": "model.layers.21.self_attn.o_proj.q_scale",
1654
  "shape": [
1655
- 18,
1656
- 576
1657
  ],
1658
  "dtype": "float16",
1659
  "format": "f32-to-bf16",
@@ -1673,8 +1673,8 @@
1673
  {
1674
  "name": "model.layers.22.mlp.down_proj.q_weight",
1675
  "shape": [
1676
- 192,
1677
- 576
1678
  ],
1679
  "dtype": "uint32",
1680
  "format": "f32-to-bf16",
@@ -1684,8 +1684,8 @@
1684
  {
1685
  "name": "model.layers.22.mlp.down_proj.q_scale",
1686
  "shape": [
1687
- 48,
1688
- 576
1689
  ],
1690
  "dtype": "float16",
1691
  "format": "f32-to-bf16",
@@ -1695,8 +1695,8 @@
1695
  {
1696
  "name": "model.layers.22.mlp.gate_up_proj.q_weight",
1697
  "shape": [
1698
- 72,
1699
- 3072
1700
  ],
1701
  "dtype": "uint32",
1702
  "format": "f32-to-bf16",
@@ -1706,8 +1706,8 @@
1706
  {
1707
  "name": "model.layers.22.mlp.gate_up_proj.q_scale",
1708
  "shape": [
1709
- 18,
1710
- 3072
1711
  ],
1712
  "dtype": "float16",
1713
  "format": "f32-to-bf16",
@@ -1727,8 +1727,8 @@
1727
  {
1728
  "name": "model.layers.22.self_attn.qkv_proj.q_weight",
1729
  "shape": [
1730
- 72,
1731
- 960
1732
  ],
1733
  "dtype": "uint32",
1734
  "format": "f32-to-bf16",
@@ -1738,8 +1738,8 @@
1738
  {
1739
  "name": "model.layers.22.self_attn.qkv_proj.q_scale",
1740
  "shape": [
1741
- 18,
1742
- 960
1743
  ],
1744
  "dtype": "float16",
1745
  "format": "f32-to-bf16",
@@ -1749,8 +1749,8 @@
1749
  {
1750
  "name": "model.layers.22.self_attn.o_proj.q_weight",
1751
  "shape": [
1752
- 72,
1753
- 576
1754
  ],
1755
  "dtype": "uint32",
1756
  "format": "f32-to-bf16",
@@ -1760,8 +1760,8 @@
1760
  {
1761
  "name": "model.layers.22.self_attn.o_proj.q_scale",
1762
  "shape": [
1763
- 18,
1764
- 576
1765
  ],
1766
  "dtype": "float16",
1767
  "format": "f32-to-bf16",
@@ -1781,8 +1781,8 @@
1781
  {
1782
  "name": "model.layers.23.mlp.down_proj.q_weight",
1783
  "shape": [
1784
- 192,
1785
- 576
1786
  ],
1787
  "dtype": "uint32",
1788
  "format": "f32-to-bf16",
@@ -1792,8 +1792,8 @@
1792
  {
1793
  "name": "model.layers.23.mlp.down_proj.q_scale",
1794
  "shape": [
1795
- 48,
1796
- 576
1797
  ],
1798
  "dtype": "float16",
1799
  "format": "f32-to-bf16",
@@ -1803,8 +1803,8 @@
1803
  {
1804
  "name": "model.layers.23.mlp.gate_up_proj.q_weight",
1805
  "shape": [
1806
- 72,
1807
- 3072
1808
  ],
1809
  "dtype": "uint32",
1810
  "format": "f32-to-bf16",
@@ -1814,8 +1814,8 @@
1814
  {
1815
  "name": "model.layers.23.mlp.gate_up_proj.q_scale",
1816
  "shape": [
1817
- 18,
1818
- 3072
1819
  ],
1820
  "dtype": "float16",
1821
  "format": "f32-to-bf16",
@@ -1835,8 +1835,8 @@
1835
  {
1836
  "name": "model.layers.23.self_attn.qkv_proj.q_weight",
1837
  "shape": [
1838
- 72,
1839
- 960
1840
  ],
1841
  "dtype": "uint32",
1842
  "format": "f32-to-bf16",
@@ -1846,8 +1846,8 @@
1846
  {
1847
  "name": "model.layers.23.self_attn.qkv_proj.q_scale",
1848
  "shape": [
1849
- 18,
1850
- 960
1851
  ],
1852
  "dtype": "float16",
1853
  "format": "f32-to-bf16",
@@ -1857,8 +1857,8 @@
1857
  {
1858
  "name": "model.layers.23.self_attn.o_proj.q_weight",
1859
  "shape": [
1860
- 72,
1861
- 576
1862
  ],
1863
  "dtype": "uint32",
1864
  "format": "f32-to-bf16",
@@ -1868,8 +1868,8 @@
1868
  {
1869
  "name": "model.layers.23.self_attn.o_proj.q_scale",
1870
  "shape": [
1871
- 18,
1872
- 576
1873
  ],
1874
  "dtype": "float16",
1875
  "format": "f32-to-bf16",
@@ -1889,8 +1889,8 @@
1889
  {
1890
  "name": "model.layers.24.mlp.down_proj.q_weight",
1891
  "shape": [
1892
- 192,
1893
- 576
1894
  ],
1895
  "dtype": "uint32",
1896
  "format": "f32-to-bf16",
@@ -1900,8 +1900,8 @@
1900
  {
1901
  "name": "model.layers.24.mlp.down_proj.q_scale",
1902
  "shape": [
1903
- 48,
1904
- 576
1905
  ],
1906
  "dtype": "float16",
1907
  "format": "f32-to-bf16",
@@ -1911,8 +1911,8 @@
1911
  {
1912
  "name": "model.layers.24.mlp.gate_up_proj.q_weight",
1913
  "shape": [
1914
- 72,
1915
- 3072
1916
  ],
1917
  "dtype": "uint32",
1918
  "format": "f32-to-bf16",
@@ -1922,8 +1922,8 @@
1922
  {
1923
  "name": "model.layers.24.mlp.gate_up_proj.q_scale",
1924
  "shape": [
1925
- 18,
1926
- 3072
1927
  ],
1928
  "dtype": "float16",
1929
  "format": "f32-to-bf16",
@@ -1943,8 +1943,8 @@
1943
  {
1944
  "name": "model.layers.24.self_attn.qkv_proj.q_weight",
1945
  "shape": [
1946
- 72,
1947
- 960
1948
  ],
1949
  "dtype": "uint32",
1950
  "format": "f32-to-bf16",
@@ -1954,8 +1954,8 @@
1954
  {
1955
  "name": "model.layers.24.self_attn.qkv_proj.q_scale",
1956
  "shape": [
1957
- 18,
1958
- 960
1959
  ],
1960
  "dtype": "float16",
1961
  "format": "f32-to-bf16",
@@ -1965,8 +1965,8 @@
1965
  {
1966
  "name": "model.layers.24.self_attn.o_proj.q_weight",
1967
  "shape": [
1968
- 72,
1969
- 576
1970
  ],
1971
  "dtype": "uint32",
1972
  "format": "f32-to-bf16",
@@ -1976,8 +1976,8 @@
1976
  {
1977
  "name": "model.layers.24.self_attn.o_proj.q_scale",
1978
  "shape": [
1979
- 18,
1980
- 576
1981
  ],
1982
  "dtype": "float16",
1983
  "format": "f32-to-bf16",
@@ -1997,8 +1997,8 @@
1997
  {
1998
  "name": "model.layers.25.mlp.down_proj.q_weight",
1999
  "shape": [
2000
- 192,
2001
- 576
2002
  ],
2003
  "dtype": "uint32",
2004
  "format": "f32-to-bf16",
@@ -2008,8 +2008,8 @@
2008
  {
2009
  "name": "model.layers.25.mlp.down_proj.q_scale",
2010
  "shape": [
2011
- 48,
2012
- 576
2013
  ],
2014
  "dtype": "float16",
2015
  "format": "f32-to-bf16",
@@ -2019,8 +2019,8 @@
2019
  {
2020
  "name": "model.layers.25.mlp.gate_up_proj.q_weight",
2021
  "shape": [
2022
- 72,
2023
- 3072
2024
  ],
2025
  "dtype": "uint32",
2026
  "format": "f32-to-bf16",
@@ -2030,8 +2030,8 @@
2030
  {
2031
  "name": "model.layers.25.mlp.gate_up_proj.q_scale",
2032
  "shape": [
2033
- 18,
2034
- 3072
2035
  ],
2036
  "dtype": "float16",
2037
  "format": "f32-to-bf16",
@@ -2051,8 +2051,8 @@
2051
  {
2052
  "name": "model.layers.25.self_attn.qkv_proj.q_weight",
2053
  "shape": [
2054
- 72,
2055
- 960
2056
  ],
2057
  "dtype": "uint32",
2058
  "format": "f32-to-bf16",
@@ -2062,8 +2062,8 @@
2062
  {
2063
  "name": "model.layers.25.self_attn.qkv_proj.q_scale",
2064
  "shape": [
2065
- 18,
2066
- 960
2067
  ],
2068
  "dtype": "float16",
2069
  "format": "f32-to-bf16",
@@ -2073,8 +2073,8 @@
2073
  {
2074
  "name": "model.layers.25.self_attn.o_proj.q_weight",
2075
  "shape": [
2076
- 72,
2077
- 576
2078
  ],
2079
  "dtype": "uint32",
2080
  "format": "f32-to-bf16",
@@ -2084,8 +2084,8 @@
2084
  {
2085
  "name": "model.layers.25.self_attn.o_proj.q_scale",
2086
  "shape": [
2087
- 18,
2088
- 576
2089
  ],
2090
  "dtype": "float16",
2091
  "format": "f32-to-bf16",
@@ -2105,8 +2105,8 @@
2105
  {
2106
  "name": "model.layers.26.mlp.down_proj.q_weight",
2107
  "shape": [
2108
- 192,
2109
- 576
2110
  ],
2111
  "dtype": "uint32",
2112
  "format": "f32-to-bf16",
@@ -2116,8 +2116,8 @@
2116
  {
2117
  "name": "model.layers.26.mlp.down_proj.q_scale",
2118
  "shape": [
2119
- 48,
2120
- 576
2121
  ],
2122
  "dtype": "float16",
2123
  "format": "f32-to-bf16",
@@ -2127,8 +2127,8 @@
2127
  {
2128
  "name": "model.layers.26.mlp.gate_up_proj.q_weight",
2129
  "shape": [
2130
- 72,
2131
- 3072
2132
  ],
2133
  "dtype": "uint32",
2134
  "format": "f32-to-bf16",
@@ -2138,8 +2138,8 @@
2138
  {
2139
  "name": "model.layers.26.mlp.gate_up_proj.q_scale",
2140
  "shape": [
2141
- 18,
2142
- 3072
2143
  ],
2144
  "dtype": "float16",
2145
  "format": "f32-to-bf16",
@@ -2159,8 +2159,8 @@
2159
  {
2160
  "name": "model.layers.26.self_attn.qkv_proj.q_weight",
2161
  "shape": [
2162
- 72,
2163
- 960
2164
  ],
2165
  "dtype": "uint32",
2166
  "format": "f32-to-bf16",
@@ -2170,8 +2170,8 @@
2170
  {
2171
  "name": "model.layers.26.self_attn.qkv_proj.q_scale",
2172
  "shape": [
2173
- 18,
2174
- 960
2175
  ],
2176
  "dtype": "float16",
2177
  "format": "f32-to-bf16",
@@ -2181,8 +2181,8 @@
2181
  {
2182
  "name": "model.layers.26.self_attn.o_proj.q_weight",
2183
  "shape": [
2184
- 72,
2185
- 576
2186
  ],
2187
  "dtype": "uint32",
2188
  "format": "f32-to-bf16",
@@ -2192,8 +2192,8 @@
2192
  {
2193
  "name": "model.layers.26.self_attn.o_proj.q_scale",
2194
  "shape": [
2195
- 18,
2196
- 576
2197
  ],
2198
  "dtype": "float16",
2199
  "format": "f32-to-bf16",
@@ -2213,8 +2213,8 @@
2213
  {
2214
  "name": "model.layers.27.mlp.down_proj.q_weight",
2215
  "shape": [
2216
- 192,
2217
- 576
2218
  ],
2219
  "dtype": "uint32",
2220
  "format": "f32-to-bf16",
@@ -2224,8 +2224,8 @@
2224
  {
2225
  "name": "model.layers.27.mlp.down_proj.q_scale",
2226
  "shape": [
2227
- 48,
2228
- 576
2229
  ],
2230
  "dtype": "float16",
2231
  "format": "f32-to-bf16",
@@ -2235,8 +2235,8 @@
2235
  {
2236
  "name": "model.layers.27.mlp.gate_up_proj.q_weight",
2237
  "shape": [
2238
- 72,
2239
- 3072
2240
  ],
2241
  "dtype": "uint32",
2242
  "format": "f32-to-bf16",
@@ -2246,8 +2246,8 @@
2246
  {
2247
  "name": "model.layers.27.mlp.gate_up_proj.q_scale",
2248
  "shape": [
2249
- 18,
2250
- 3072
2251
  ],
2252
  "dtype": "float16",
2253
  "format": "f32-to-bf16",
@@ -2267,8 +2267,8 @@
2267
  {
2268
  "name": "model.layers.27.self_attn.qkv_proj.q_weight",
2269
  "shape": [
2270
- 72,
2271
- 960
2272
  ],
2273
  "dtype": "uint32",
2274
  "format": "f32-to-bf16",
@@ -2278,8 +2278,8 @@
2278
  {
2279
  "name": "model.layers.27.self_attn.qkv_proj.q_scale",
2280
  "shape": [
2281
- 18,
2282
- 960
2283
  ],
2284
  "dtype": "float16",
2285
  "format": "f32-to-bf16",
@@ -2289,8 +2289,8 @@
2289
  {
2290
  "name": "model.layers.27.self_attn.o_proj.q_weight",
2291
  "shape": [
2292
- 72,
2293
- 576
2294
  ],
2295
  "dtype": "uint32",
2296
  "format": "f32-to-bf16",
@@ -2300,8 +2300,8 @@
2300
  {
2301
  "name": "model.layers.27.self_attn.o_proj.q_scale",
2302
  "shape": [
2303
- 18,
2304
- 576
2305
  ],
2306
  "dtype": "float16",
2307
  "format": "f32-to-bf16",
@@ -2321,8 +2321,8 @@
2321
  {
2322
  "name": "model.layers.28.mlp.down_proj.q_weight",
2323
  "shape": [
2324
- 192,
2325
- 576
2326
  ],
2327
  "dtype": "uint32",
2328
  "format": "f32-to-bf16",
@@ -2332,8 +2332,8 @@
2332
  {
2333
  "name": "model.layers.28.mlp.down_proj.q_scale",
2334
  "shape": [
2335
- 48,
2336
- 576
2337
  ],
2338
  "dtype": "float16",
2339
  "format": "f32-to-bf16",
@@ -2343,8 +2343,8 @@
2343
  {
2344
  "name": "model.layers.28.mlp.gate_up_proj.q_weight",
2345
  "shape": [
2346
- 72,
2347
- 3072
2348
  ],
2349
  "dtype": "uint32",
2350
  "format": "f32-to-bf16",
@@ -2354,8 +2354,8 @@
2354
  {
2355
  "name": "model.layers.28.mlp.gate_up_proj.q_scale",
2356
  "shape": [
2357
- 18,
2358
- 3072
2359
  ],
2360
  "dtype": "float16",
2361
  "format": "f32-to-bf16",
@@ -2375,8 +2375,8 @@
2375
  {
2376
  "name": "model.layers.28.self_attn.qkv_proj.q_weight",
2377
  "shape": [
2378
- 72,
2379
- 960
2380
  ],
2381
  "dtype": "uint32",
2382
  "format": "f32-to-bf16",
@@ -2386,8 +2386,8 @@
2386
  {
2387
  "name": "model.layers.28.self_attn.qkv_proj.q_scale",
2388
  "shape": [
2389
- 18,
2390
- 960
2391
  ],
2392
  "dtype": "float16",
2393
  "format": "f32-to-bf16",
@@ -2397,8 +2397,8 @@
2397
  {
2398
  "name": "model.layers.28.self_attn.o_proj.q_weight",
2399
  "shape": [
2400
- 72,
2401
- 576
2402
  ],
2403
  "dtype": "uint32",
2404
  "format": "f32-to-bf16",
@@ -2408,8 +2408,8 @@
2408
  {
2409
  "name": "model.layers.28.self_attn.o_proj.q_scale",
2410
  "shape": [
2411
- 18,
2412
- 576
2413
  ],
2414
  "dtype": "float16",
2415
  "format": "f32-to-bf16",
@@ -2429,8 +2429,8 @@
2429
  {
2430
  "name": "model.layers.29.mlp.down_proj.q_weight",
2431
  "shape": [
2432
- 192,
2433
- 576
2434
  ],
2435
  "dtype": "uint32",
2436
  "format": "f32-to-bf16",
@@ -2440,8 +2440,8 @@
2440
  {
2441
  "name": "model.layers.29.mlp.down_proj.q_scale",
2442
  "shape": [
2443
- 48,
2444
- 576
2445
  ],
2446
  "dtype": "float16",
2447
  "format": "f32-to-bf16",
@@ -2451,8 +2451,8 @@
2451
  {
2452
  "name": "model.layers.29.mlp.gate_up_proj.q_weight",
2453
  "shape": [
2454
- 72,
2455
- 3072
2456
  ],
2457
  "dtype": "uint32",
2458
  "format": "f32-to-bf16",
@@ -2462,8 +2462,8 @@
2462
  {
2463
  "name": "model.layers.29.mlp.gate_up_proj.q_scale",
2464
  "shape": [
2465
- 18,
2466
- 3072
2467
  ],
2468
  "dtype": "float16",
2469
  "format": "f32-to-bf16",
@@ -2483,8 +2483,8 @@
2483
  {
2484
  "name": "model.layers.29.self_attn.qkv_proj.q_weight",
2485
  "shape": [
2486
- 72,
2487
- 960
2488
  ],
2489
  "dtype": "uint32",
2490
  "format": "f32-to-bf16",
@@ -2494,8 +2494,8 @@
2494
  {
2495
  "name": "model.layers.29.self_attn.qkv_proj.q_scale",
2496
  "shape": [
2497
- 18,
2498
- 960
2499
  ],
2500
  "dtype": "float16",
2501
  "format": "f32-to-bf16",
@@ -2505,8 +2505,8 @@
2505
  {
2506
  "name": "model.layers.29.self_attn.o_proj.q_weight",
2507
  "shape": [
2508
- 72,
2509
- 576
2510
  ],
2511
  "dtype": "uint32",
2512
  "format": "f32-to-bf16",
@@ -2516,8 +2516,8 @@
2516
  {
2517
  "name": "model.layers.29.self_attn.o_proj.q_scale",
2518
  "shape": [
2519
- 18,
2520
- 576
2521
  ],
2522
  "dtype": "float16",
2523
  "format": "f32-to-bf16",
@@ -2537,8 +2537,8 @@
2537
  {
2538
  "name": "model.layers.3.mlp.down_proj.q_weight",
2539
  "shape": [
2540
- 192,
2541
- 576
2542
  ],
2543
  "dtype": "uint32",
2544
  "format": "f32-to-bf16",
@@ -2548,8 +2548,8 @@
2548
  {
2549
  "name": "model.layers.3.mlp.down_proj.q_scale",
2550
  "shape": [
2551
- 48,
2552
- 576
2553
  ],
2554
  "dtype": "float16",
2555
  "format": "f32-to-bf16",
@@ -2559,8 +2559,8 @@
2559
  {
2560
  "name": "model.layers.3.mlp.gate_up_proj.q_weight",
2561
  "shape": [
2562
- 72,
2563
- 3072
2564
  ],
2565
  "dtype": "uint32",
2566
  "format": "f32-to-bf16",
@@ -2570,8 +2570,8 @@
2570
  {
2571
  "name": "model.layers.3.mlp.gate_up_proj.q_scale",
2572
  "shape": [
2573
- 18,
2574
- 3072
2575
  ],
2576
  "dtype": "float16",
2577
  "format": "f32-to-bf16",
@@ -2591,8 +2591,8 @@
2591
  {
2592
  "name": "model.layers.3.self_attn.qkv_proj.q_weight",
2593
  "shape": [
2594
- 72,
2595
- 960
2596
  ],
2597
  "dtype": "uint32",
2598
  "format": "f32-to-bf16",
@@ -2602,8 +2602,8 @@
2602
  {
2603
  "name": "model.layers.3.self_attn.qkv_proj.q_scale",
2604
  "shape": [
2605
- 18,
2606
- 960
2607
  ],
2608
  "dtype": "float16",
2609
  "format": "f32-to-bf16",
@@ -2613,8 +2613,8 @@
2613
  {
2614
  "name": "model.layers.3.self_attn.o_proj.q_weight",
2615
  "shape": [
2616
- 72,
2617
- 576
2618
  ],
2619
  "dtype": "uint32",
2620
  "format": "f32-to-bf16",
@@ -2624,8 +2624,8 @@
2624
  {
2625
  "name": "model.layers.3.self_attn.o_proj.q_scale",
2626
  "shape": [
2627
- 18,
2628
- 576
2629
  ],
2630
  "dtype": "float16",
2631
  "format": "f32-to-bf16",
@@ -2645,8 +2645,8 @@
2645
  {
2646
  "name": "model.layers.4.mlp.down_proj.q_weight",
2647
  "shape": [
2648
- 192,
2649
- 576
2650
  ],
2651
  "dtype": "uint32",
2652
  "format": "f32-to-bf16",
@@ -2656,8 +2656,8 @@
2656
  {
2657
  "name": "model.layers.4.mlp.down_proj.q_scale",
2658
  "shape": [
2659
- 48,
2660
- 576
2661
  ],
2662
  "dtype": "float16",
2663
  "format": "f32-to-bf16",
@@ -2667,8 +2667,8 @@
2667
  {
2668
  "name": "model.layers.4.mlp.gate_up_proj.q_weight",
2669
  "shape": [
2670
- 72,
2671
- 3072
2672
  ],
2673
  "dtype": "uint32",
2674
  "format": "f32-to-bf16",
@@ -2678,8 +2678,8 @@
2678
  {
2679
  "name": "model.layers.4.mlp.gate_up_proj.q_scale",
2680
  "shape": [
2681
- 18,
2682
- 3072
2683
  ],
2684
  "dtype": "float16",
2685
  "format": "f32-to-bf16",
@@ -2699,8 +2699,8 @@
2699
  {
2700
  "name": "model.layers.4.self_attn.qkv_proj.q_weight",
2701
  "shape": [
2702
- 72,
2703
- 960
2704
  ],
2705
  "dtype": "uint32",
2706
  "format": "f32-to-bf16",
@@ -2710,8 +2710,8 @@
2710
  {
2711
  "name": "model.layers.4.self_attn.qkv_proj.q_scale",
2712
  "shape": [
2713
- 18,
2714
- 960
2715
  ],
2716
  "dtype": "float16",
2717
  "format": "f32-to-bf16",
@@ -2721,8 +2721,8 @@
2721
  {
2722
  "name": "model.layers.4.self_attn.o_proj.q_weight",
2723
  "shape": [
2724
- 72,
2725
- 576
2726
  ],
2727
  "dtype": "uint32",
2728
  "format": "f32-to-bf16",
@@ -2732,8 +2732,8 @@
2732
  {
2733
  "name": "model.layers.4.self_attn.o_proj.q_scale",
2734
  "shape": [
2735
- 18,
2736
- 576
2737
  ],
2738
  "dtype": "float16",
2739
  "format": "f32-to-bf16",
@@ -2753,8 +2753,8 @@
2753
  {
2754
  "name": "model.layers.5.mlp.down_proj.q_weight",
2755
  "shape": [
2756
- 192,
2757
- 576
2758
  ],
2759
  "dtype": "uint32",
2760
  "format": "f32-to-bf16",
@@ -2764,8 +2764,8 @@
2764
  {
2765
  "name": "model.layers.5.mlp.down_proj.q_scale",
2766
  "shape": [
2767
- 48,
2768
- 576
2769
  ],
2770
  "dtype": "float16",
2771
  "format": "f32-to-bf16",
@@ -2773,7 +2773,7 @@
2773
  "byteOffset": 32828544
2774
  }
2775
  ],
2776
- "md5sum": "1ac207801557d776470d99e7302f9223"
2777
  },
2778
  {
2779
  "dataPath": "params_shard_2.bin",
@@ -2783,8 +2783,8 @@
2783
  {
2784
  "name": "model.layers.5.mlp.gate_up_proj.q_weight",
2785
  "shape": [
2786
- 72,
2787
- 3072
2788
  ],
2789
  "dtype": "uint32",
2790
  "format": "f32-to-bf16",
@@ -2794,8 +2794,8 @@
2794
  {
2795
  "name": "model.layers.5.mlp.gate_up_proj.q_scale",
2796
  "shape": [
2797
- 18,
2798
- 3072
2799
  ],
2800
  "dtype": "float16",
2801
  "format": "f32-to-bf16",
@@ -2815,8 +2815,8 @@
2815
  {
2816
  "name": "model.layers.5.self_attn.qkv_proj.q_weight",
2817
  "shape": [
2818
- 72,
2819
- 960
2820
  ],
2821
  "dtype": "uint32",
2822
  "format": "f32-to-bf16",
@@ -2826,8 +2826,8 @@
2826
  {
2827
  "name": "model.layers.5.self_attn.qkv_proj.q_scale",
2828
  "shape": [
2829
- 18,
2830
- 960
2831
  ],
2832
  "dtype": "float16",
2833
  "format": "f32-to-bf16",
@@ -2837,8 +2837,8 @@
2837
  {
2838
  "name": "model.layers.5.self_attn.o_proj.q_weight",
2839
  "shape": [
2840
- 72,
2841
- 576
2842
  ],
2843
  "dtype": "uint32",
2844
  "format": "f32-to-bf16",
@@ -2848,8 +2848,8 @@
2848
  {
2849
  "name": "model.layers.5.self_attn.o_proj.q_scale",
2850
  "shape": [
2851
- 18,
2852
- 576
2853
  ],
2854
  "dtype": "float16",
2855
  "format": "f32-to-bf16",
@@ -2869,8 +2869,8 @@
2869
  {
2870
  "name": "model.layers.6.mlp.down_proj.q_weight",
2871
  "shape": [
2872
- 192,
2873
- 576
2874
  ],
2875
  "dtype": "uint32",
2876
  "format": "f32-to-bf16",
@@ -2880,8 +2880,8 @@
2880
  {
2881
  "name": "model.layers.6.mlp.down_proj.q_scale",
2882
  "shape": [
2883
- 48,
2884
- 576
2885
  ],
2886
  "dtype": "float16",
2887
  "format": "f32-to-bf16",
@@ -2891,8 +2891,8 @@
2891
  {
2892
  "name": "model.layers.6.mlp.gate_up_proj.q_weight",
2893
  "shape": [
2894
- 72,
2895
- 3072
2896
  ],
2897
  "dtype": "uint32",
2898
  "format": "f32-to-bf16",
@@ -2902,8 +2902,8 @@
2902
  {
2903
  "name": "model.layers.6.mlp.gate_up_proj.q_scale",
2904
  "shape": [
2905
- 18,
2906
- 3072
2907
  ],
2908
  "dtype": "float16",
2909
  "format": "f32-to-bf16",
@@ -2923,8 +2923,8 @@
2923
  {
2924
  "name": "model.layers.6.self_attn.qkv_proj.q_weight",
2925
  "shape": [
2926
- 72,
2927
- 960
2928
  ],
2929
  "dtype": "uint32",
2930
  "format": "f32-to-bf16",
@@ -2934,8 +2934,8 @@
2934
  {
2935
  "name": "model.layers.6.self_attn.qkv_proj.q_scale",
2936
  "shape": [
2937
- 18,
2938
- 960
2939
  ],
2940
  "dtype": "float16",
2941
  "format": "f32-to-bf16",
@@ -2945,8 +2945,8 @@
2945
  {
2946
  "name": "model.layers.6.self_attn.o_proj.q_weight",
2947
  "shape": [
2948
- 72,
2949
- 576
2950
  ],
2951
  "dtype": "uint32",
2952
  "format": "f32-to-bf16",
@@ -2956,8 +2956,8 @@
2956
  {
2957
  "name": "model.layers.6.self_attn.o_proj.q_scale",
2958
  "shape": [
2959
- 18,
2960
- 576
2961
  ],
2962
  "dtype": "float16",
2963
  "format": "f32-to-bf16",
@@ -2977,8 +2977,8 @@
2977
  {
2978
  "name": "model.layers.7.mlp.down_proj.q_weight",
2979
  "shape": [
2980
- 192,
2981
- 576
2982
  ],
2983
  "dtype": "uint32",
2984
  "format": "f32-to-bf16",
@@ -2988,8 +2988,8 @@
2988
  {
2989
  "name": "model.layers.7.mlp.down_proj.q_scale",
2990
  "shape": [
2991
- 48,
2992
- 576
2993
  ],
2994
  "dtype": "float16",
2995
  "format": "f32-to-bf16",
@@ -2999,8 +2999,8 @@
2999
  {
3000
  "name": "model.layers.7.mlp.gate_up_proj.q_weight",
3001
  "shape": [
3002
- 72,
3003
- 3072
3004
  ],
3005
  "dtype": "uint32",
3006
  "format": "f32-to-bf16",
@@ -3010,8 +3010,8 @@
3010
  {
3011
  "name": "model.layers.7.mlp.gate_up_proj.q_scale",
3012
  "shape": [
3013
- 18,
3014
- 3072
3015
  ],
3016
  "dtype": "float16",
3017
  "format": "f32-to-bf16",
@@ -3031,8 +3031,8 @@
3031
  {
3032
  "name": "model.layers.7.self_attn.qkv_proj.q_weight",
3033
  "shape": [
3034
- 72,
3035
- 960
3036
  ],
3037
  "dtype": "uint32",
3038
  "format": "f32-to-bf16",
@@ -3042,8 +3042,8 @@
3042
  {
3043
  "name": "model.layers.7.self_attn.qkv_proj.q_scale",
3044
  "shape": [
3045
- 18,
3046
- 960
3047
  ],
3048
  "dtype": "float16",
3049
  "format": "f32-to-bf16",
@@ -3053,8 +3053,8 @@
3053
  {
3054
  "name": "model.layers.7.self_attn.o_proj.q_weight",
3055
  "shape": [
3056
- 72,
3057
- 576
3058
  ],
3059
  "dtype": "uint32",
3060
  "format": "f32-to-bf16",
@@ -3064,8 +3064,8 @@
3064
  {
3065
  "name": "model.layers.7.self_attn.o_proj.q_scale",
3066
  "shape": [
3067
- 18,
3068
- 576
3069
  ],
3070
  "dtype": "float16",
3071
  "format": "f32-to-bf16",
@@ -3085,8 +3085,8 @@
3085
  {
3086
  "name": "model.layers.8.mlp.down_proj.q_weight",
3087
  "shape": [
3088
- 192,
3089
- 576
3090
  ],
3091
  "dtype": "uint32",
3092
  "format": "f32-to-bf16",
@@ -3096,8 +3096,8 @@
3096
  {
3097
  "name": "model.layers.8.mlp.down_proj.q_scale",
3098
  "shape": [
3099
- 48,
3100
- 576
3101
  ],
3102
  "dtype": "float16",
3103
  "format": "f32-to-bf16",
@@ -3107,8 +3107,8 @@
3107
  {
3108
  "name": "model.layers.8.mlp.gate_up_proj.q_weight",
3109
  "shape": [
3110
- 72,
3111
- 3072
3112
  ],
3113
  "dtype": "uint32",
3114
  "format": "f32-to-bf16",
@@ -3118,8 +3118,8 @@
3118
  {
3119
  "name": "model.layers.8.mlp.gate_up_proj.q_scale",
3120
  "shape": [
3121
- 18,
3122
- 3072
3123
  ],
3124
  "dtype": "float16",
3125
  "format": "f32-to-bf16",
@@ -3139,8 +3139,8 @@
3139
  {
3140
  "name": "model.layers.8.self_attn.qkv_proj.q_weight",
3141
  "shape": [
3142
- 72,
3143
- 960
3144
  ],
3145
  "dtype": "uint32",
3146
  "format": "f32-to-bf16",
@@ -3150,8 +3150,8 @@
3150
  {
3151
  "name": "model.layers.8.self_attn.qkv_proj.q_scale",
3152
  "shape": [
3153
- 18,
3154
- 960
3155
  ],
3156
  "dtype": "float16",
3157
  "format": "f32-to-bf16",
@@ -3161,8 +3161,8 @@
3161
  {
3162
  "name": "model.layers.8.self_attn.o_proj.q_weight",
3163
  "shape": [
3164
- 72,
3165
- 576
3166
  ],
3167
  "dtype": "uint32",
3168
  "format": "f32-to-bf16",
@@ -3172,8 +3172,8 @@
3172
  {
3173
  "name": "model.layers.8.self_attn.o_proj.q_scale",
3174
  "shape": [
3175
- 18,
3176
- 576
3177
  ],
3178
  "dtype": "float16",
3179
  "format": "f32-to-bf16",
@@ -3193,8 +3193,8 @@
3193
  {
3194
  "name": "model.layers.9.mlp.down_proj.q_weight",
3195
  "shape": [
3196
- 192,
3197
- 576
3198
  ],
3199
  "dtype": "uint32",
3200
  "format": "f32-to-bf16",
@@ -3204,8 +3204,8 @@
3204
  {
3205
  "name": "model.layers.9.mlp.down_proj.q_scale",
3206
  "shape": [
3207
- 48,
3208
- 576
3209
  ],
3210
  "dtype": "float16",
3211
  "format": "f32-to-bf16",
@@ -3215,8 +3215,8 @@
3215
  {
3216
  "name": "model.layers.9.mlp.gate_up_proj.q_weight",
3217
  "shape": [
3218
- 72,
3219
- 3072
3220
  ],
3221
  "dtype": "uint32",
3222
  "format": "f32-to-bf16",
@@ -3226,8 +3226,8 @@
3226
  {
3227
  "name": "model.layers.9.mlp.gate_up_proj.q_scale",
3228
  "shape": [
3229
- 18,
3230
- 3072
3231
  ],
3232
  "dtype": "float16",
3233
  "format": "f32-to-bf16",
@@ -3247,8 +3247,8 @@
3247
  {
3248
  "name": "model.layers.9.self_attn.qkv_proj.q_weight",
3249
  "shape": [
3250
- 72,
3251
- 960
3252
  ],
3253
  "dtype": "uint32",
3254
  "format": "f32-to-bf16",
@@ -3258,8 +3258,8 @@
3258
  {
3259
  "name": "model.layers.9.self_attn.qkv_proj.q_scale",
3260
  "shape": [
3261
- 18,
3262
- 960
3263
  ],
3264
  "dtype": "float16",
3265
  "format": "f32-to-bf16",
@@ -3269,8 +3269,8 @@
3269
  {
3270
  "name": "model.layers.9.self_attn.o_proj.q_weight",
3271
  "shape": [
3272
- 72,
3273
- 576
3274
  ],
3275
  "dtype": "uint32",
3276
  "format": "f32-to-bf16",
@@ -3280,8 +3280,8 @@
3280
  {
3281
  "name": "model.layers.9.self_attn.o_proj.q_scale",
3282
  "shape": [
3283
- 18,
3284
- 576
3285
  ],
3286
  "dtype": "float16",
3287
  "format": "f32-to-bf16",
@@ -3299,7 +3299,7 @@
3299
  "byteOffset": 9465984
3300
  }
3301
  ],
3302
- "md5sum": "8e0bffe43e07eeeaa46bca85eaf999e3"
3303
  }
3304
  ]
3305
  }
 
45
  {
46
  "name": "model.layers.0.mlp.down_proj.q_weight",
47
  "shape": [
48
+ 576,
49
+ 192
50
  ],
51
  "dtype": "uint32",
52
  "format": "f32-to-bf16",
 
56
  {
57
  "name": "model.layers.0.mlp.down_proj.q_scale",
58
  "shape": [
59
+ 576,
60
+ 48
61
  ],
62
  "dtype": "float16",
63
  "format": "f32-to-bf16",
 
67
  {
68
  "name": "model.layers.0.mlp.gate_up_proj.q_weight",
69
  "shape": [
70
+ 3072,
71
+ 72
72
  ],
73
  "dtype": "uint32",
74
  "format": "f32-to-bf16",
 
78
  {
79
  "name": "model.layers.0.mlp.gate_up_proj.q_scale",
80
  "shape": [
81
+ 3072,
82
+ 18
83
  ],
84
  "dtype": "float16",
85
  "format": "f32-to-bf16",
 
99
  {
100
  "name": "model.layers.0.self_attn.qkv_proj.q_weight",
101
  "shape": [
102
+ 960,
103
+ 72
104
  ],
105
  "dtype": "uint32",
106
  "format": "f32-to-bf16",
 
110
  {
111
  "name": "model.layers.0.self_attn.qkv_proj.q_scale",
112
  "shape": [
113
+ 960,
114
+ 18
115
  ],
116
  "dtype": "float16",
117
  "format": "f32-to-bf16",
 
121
  {
122
  "name": "model.layers.0.self_attn.o_proj.q_weight",
123
  "shape": [
124
+ 576,
125
+ 72
126
  ],
127
  "dtype": "uint32",
128
  "format": "f32-to-bf16",
 
132
  {
133
  "name": "model.layers.0.self_attn.o_proj.q_scale",
134
  "shape": [
135
+ 576,
136
+ 18
137
  ],
138
  "dtype": "float16",
139
  "format": "f32-to-bf16",
 
153
  {
154
  "name": "model.layers.1.mlp.down_proj.q_weight",
155
  "shape": [
156
+ 576,
157
+ 192
158
  ],
159
  "dtype": "uint32",
160
  "format": "f32-to-bf16",
 
164
  {
165
  "name": "model.layers.1.mlp.down_proj.q_scale",
166
  "shape": [
167
+ 576,
168
+ 48
169
  ],
170
  "dtype": "float16",
171
  "format": "f32-to-bf16",
 
175
  {
176
  "name": "model.layers.1.mlp.gate_up_proj.q_weight",
177
  "shape": [
178
+ 3072,
179
+ 72
180
  ],
181
  "dtype": "uint32",
182
  "format": "f32-to-bf16",
 
186
  {
187
  "name": "model.layers.1.mlp.gate_up_proj.q_scale",
188
  "shape": [
189
+ 3072,
190
+ 18
191
  ],
192
  "dtype": "float16",
193
  "format": "f32-to-bf16",
 
207
  {
208
  "name": "model.layers.1.self_attn.qkv_proj.q_weight",
209
  "shape": [
210
+ 960,
211
+ 72
212
  ],
213
  "dtype": "uint32",
214
  "format": "f32-to-bf16",
 
218
  {
219
  "name": "model.layers.1.self_attn.qkv_proj.q_scale",
220
  "shape": [
221
+ 960,
222
+ 18
223
  ],
224
  "dtype": "float16",
225
  "format": "f32-to-bf16",
 
229
  {
230
  "name": "model.layers.1.self_attn.o_proj.q_weight",
231
  "shape": [
232
+ 576,
233
+ 72
234
  ],
235
  "dtype": "uint32",
236
  "format": "f32-to-bf16",
 
240
  {
241
  "name": "model.layers.1.self_attn.o_proj.q_scale",
242
  "shape": [
243
+ 576,
244
+ 18
245
  ],
246
  "dtype": "float16",
247
  "format": "f32-to-bf16",
 
261
  {
262
  "name": "model.layers.10.mlp.down_proj.q_weight",
263
  "shape": [
264
+ 576,
265
+ 192
266
  ],
267
  "dtype": "uint32",
268
  "format": "f32-to-bf16",
 
272
  {
273
  "name": "model.layers.10.mlp.down_proj.q_scale",
274
  "shape": [
275
+ 576,
276
+ 48
277
  ],
278
  "dtype": "float16",
279
  "format": "f32-to-bf16",
 
283
  {
284
  "name": "model.layers.10.mlp.gate_up_proj.q_weight",
285
  "shape": [
286
+ 3072,
287
+ 72
288
  ],
289
  "dtype": "uint32",
290
  "format": "f32-to-bf16",
 
294
  {
295
  "name": "model.layers.10.mlp.gate_up_proj.q_scale",
296
  "shape": [
297
+ 3072,
298
+ 18
299
  ],
300
  "dtype": "float16",
301
  "format": "f32-to-bf16",
 
315
  {
316
  "name": "model.layers.10.self_attn.qkv_proj.q_weight",
317
  "shape": [
318
+ 960,
319
+ 72
320
  ],
321
  "dtype": "uint32",
322
  "format": "f32-to-bf16",
 
326
  {
327
  "name": "model.layers.10.self_attn.qkv_proj.q_scale",
328
  "shape": [
329
+ 960,
330
+ 18
331
  ],
332
  "dtype": "float16",
333
  "format": "f32-to-bf16",
 
337
  {
338
  "name": "model.layers.10.self_attn.o_proj.q_weight",
339
  "shape": [
340
+ 576,
341
+ 72
342
  ],
343
  "dtype": "uint32",
344
  "format": "f32-to-bf16",
 
348
  {
349
  "name": "model.layers.10.self_attn.o_proj.q_scale",
350
  "shape": [
351
+ 576,
352
+ 18
353
  ],
354
  "dtype": "float16",
355
  "format": "f32-to-bf16",
 
369
  {
370
  "name": "model.layers.11.mlp.down_proj.q_weight",
371
  "shape": [
372
+ 576,
373
+ 192
374
  ],
375
  "dtype": "uint32",
376
  "format": "f32-to-bf16",
 
380
  {
381
  "name": "model.layers.11.mlp.down_proj.q_scale",
382
  "shape": [
383
+ 576,
384
+ 48
385
  ],
386
  "dtype": "float16",
387
  "format": "f32-to-bf16",
 
391
  {
392
  "name": "model.layers.11.mlp.gate_up_proj.q_weight",
393
  "shape": [
394
+ 3072,
395
+ 72
396
  ],
397
  "dtype": "uint32",
398
  "format": "f32-to-bf16",
 
402
  {
403
  "name": "model.layers.11.mlp.gate_up_proj.q_scale",
404
  "shape": [
405
+ 3072,
406
+ 18
407
  ],
408
  "dtype": "float16",
409
  "format": "f32-to-bf16",
 
423
  {
424
  "name": "model.layers.11.self_attn.qkv_proj.q_weight",
425
  "shape": [
426
+ 960,
427
+ 72
428
  ],
429
  "dtype": "uint32",
430
  "format": "f32-to-bf16",
 
434
  {
435
  "name": "model.layers.11.self_attn.qkv_proj.q_scale",
436
  "shape": [
437
+ 960,
438
+ 18
439
  ],
440
  "dtype": "float16",
441
  "format": "f32-to-bf16",
 
445
  {
446
  "name": "model.layers.11.self_attn.o_proj.q_weight",
447
  "shape": [
448
+ 576,
449
+ 72
450
  ],
451
  "dtype": "uint32",
452
  "format": "f32-to-bf16",
 
456
  {
457
  "name": "model.layers.11.self_attn.o_proj.q_scale",
458
  "shape": [
459
+ 576,
460
+ 18
461
  ],
462
  "dtype": "float16",
463
  "format": "f32-to-bf16",
 
477
  {
478
  "name": "model.layers.12.mlp.down_proj.q_weight",
479
  "shape": [
480
+ 576,
481
+ 192
482
  ],
483
  "dtype": "uint32",
484
  "format": "f32-to-bf16",
 
488
  {
489
  "name": "model.layers.12.mlp.down_proj.q_scale",
490
  "shape": [
491
+ 576,
492
+ 48
493
  ],
494
  "dtype": "float16",
495
  "format": "f32-to-bf16",
 
499
  {
500
  "name": "model.layers.12.mlp.gate_up_proj.q_weight",
501
  "shape": [
502
+ 3072,
503
+ 72
504
  ],
505
  "dtype": "uint32",
506
  "format": "f32-to-bf16",
 
510
  {
511
  "name": "model.layers.12.mlp.gate_up_proj.q_scale",
512
  "shape": [
513
+ 3072,
514
+ 18
515
  ],
516
  "dtype": "float16",
517
  "format": "f32-to-bf16",
 
531
  {
532
  "name": "model.layers.12.self_attn.qkv_proj.q_weight",
533
  "shape": [
534
+ 960,
535
+ 72
536
  ],
537
  "dtype": "uint32",
538
  "format": "f32-to-bf16",
 
542
  {
543
  "name": "model.layers.12.self_attn.qkv_proj.q_scale",
544
  "shape": [
545
+ 960,
546
+ 18
547
  ],
548
  "dtype": "float16",
549
  "format": "f32-to-bf16",
 
553
  {
554
  "name": "model.layers.12.self_attn.o_proj.q_weight",
555
  "shape": [
556
+ 576,
557
+ 72
558
  ],
559
  "dtype": "uint32",
560
  "format": "f32-to-bf16",
 
564
  {
565
  "name": "model.layers.12.self_attn.o_proj.q_scale",
566
  "shape": [
567
+ 576,
568
+ 18
569
  ],
570
  "dtype": "float16",
571
  "format": "f32-to-bf16",
 
585
  {
586
  "name": "model.layers.13.mlp.down_proj.q_weight",
587
  "shape": [
588
+ 576,
589
+ 192
590
  ],
591
  "dtype": "uint32",
592
  "format": "f32-to-bf16",
 
596
  {
597
  "name": "model.layers.13.mlp.down_proj.q_scale",
598
  "shape": [
599
+ 576,
600
+ 48
601
  ],
602
  "dtype": "float16",
603
  "format": "f32-to-bf16",
 
607
  {
608
  "name": "model.layers.13.mlp.gate_up_proj.q_weight",
609
  "shape": [
610
+ 3072,
611
+ 72
612
  ],
613
  "dtype": "uint32",
614
  "format": "f32-to-bf16",
 
618
  {
619
  "name": "model.layers.13.mlp.gate_up_proj.q_scale",
620
  "shape": [
621
+ 3072,
622
+ 18
623
  ],
624
  "dtype": "float16",
625
  "format": "f32-to-bf16",
 
639
  {
640
  "name": "model.layers.13.self_attn.qkv_proj.q_weight",
641
  "shape": [
642
+ 960,
643
+ 72
644
  ],
645
  "dtype": "uint32",
646
  "format": "f32-to-bf16",
 
650
  {
651
  "name": "model.layers.13.self_attn.qkv_proj.q_scale",
652
  "shape": [
653
+ 960,
654
+ 18
655
  ],
656
  "dtype": "float16",
657
  "format": "f32-to-bf16",
 
661
  {
662
  "name": "model.layers.13.self_attn.o_proj.q_weight",
663
  "shape": [
664
+ 576,
665
+ 72
666
  ],
667
  "dtype": "uint32",
668
  "format": "f32-to-bf16",
 
672
  {
673
  "name": "model.layers.13.self_attn.o_proj.q_scale",
674
  "shape": [
675
+ 576,
676
+ 18
677
  ],
678
  "dtype": "float16",
679
  "format": "f32-to-bf16",
 
693
  {
694
  "name": "model.layers.14.mlp.down_proj.q_weight",
695
  "shape": [
696
+ 576,
697
+ 192
698
  ],
699
  "dtype": "uint32",
700
  "format": "f32-to-bf16",
 
704
  {
705
  "name": "model.layers.14.mlp.down_proj.q_scale",
706
  "shape": [
707
+ 576,
708
+ 48
709
  ],
710
  "dtype": "float16",
711
  "format": "f32-to-bf16",
 
715
  {
716
  "name": "model.layers.14.mlp.gate_up_proj.q_weight",
717
  "shape": [
718
+ 3072,
719
+ 72
720
  ],
721
  "dtype": "uint32",
722
  "format": "f32-to-bf16",
 
726
  {
727
  "name": "model.layers.14.mlp.gate_up_proj.q_scale",
728
  "shape": [
729
+ 3072,
730
+ 18
731
  ],
732
  "dtype": "float16",
733
  "format": "f32-to-bf16",
 
747
  {
748
  "name": "model.layers.14.self_attn.qkv_proj.q_weight",
749
  "shape": [
750
+ 960,
751
+ 72
752
  ],
753
  "dtype": "uint32",
754
  "format": "f32-to-bf16",
 
758
  {
759
  "name": "model.layers.14.self_attn.qkv_proj.q_scale",
760
  "shape": [
761
+ 960,
762
+ 18
763
  ],
764
  "dtype": "float16",
765
  "format": "f32-to-bf16",
 
769
  {
770
  "name": "model.layers.14.self_attn.o_proj.q_weight",
771
  "shape": [
772
+ 576,
773
+ 72
774
  ],
775
  "dtype": "uint32",
776
  "format": "f32-to-bf16",
 
780
  {
781
  "name": "model.layers.14.self_attn.o_proj.q_scale",
782
  "shape": [
783
+ 576,
784
+ 18
785
  ],
786
  "dtype": "float16",
787
  "format": "f32-to-bf16",
 
801
  {
802
  "name": "model.layers.15.mlp.down_proj.q_weight",
803
  "shape": [
804
+ 576,
805
+ 192
806
  ],
807
  "dtype": "uint32",
808
  "format": "f32-to-bf16",
 
812
  {
813
  "name": "model.layers.15.mlp.down_proj.q_scale",
814
  "shape": [
815
+ 576,
816
+ 48
817
  ],
818
  "dtype": "float16",
819
  "format": "f32-to-bf16",
 
823
  {
824
  "name": "model.layers.15.mlp.gate_up_proj.q_weight",
825
  "shape": [
826
+ 3072,
827
+ 72
828
  ],
829
  "dtype": "uint32",
830
  "format": "f32-to-bf16",
 
834
  {
835
  "name": "model.layers.15.mlp.gate_up_proj.q_scale",
836
  "shape": [
837
+ 3072,
838
+ 18
839
  ],
840
  "dtype": "float16",
841
  "format": "f32-to-bf16",
 
855
  {
856
  "name": "model.layers.15.self_attn.qkv_proj.q_weight",
857
  "shape": [
858
+ 960,
859
+ 72
860
  ],
861
  "dtype": "uint32",
862
  "format": "f32-to-bf16",
 
866
  {
867
  "name": "model.layers.15.self_attn.qkv_proj.q_scale",
868
  "shape": [
869
+ 960,
870
+ 18
871
  ],
872
  "dtype": "float16",
873
  "format": "f32-to-bf16",
 
877
  {
878
  "name": "model.layers.15.self_attn.o_proj.q_weight",
879
  "shape": [
880
+ 576,
881
+ 72
882
  ],
883
  "dtype": "uint32",
884
  "format": "f32-to-bf16",
 
888
  {
889
  "name": "model.layers.15.self_attn.o_proj.q_scale",
890
  "shape": [
891
+ 576,
892
+ 18
893
  ],
894
  "dtype": "float16",
895
  "format": "f32-to-bf16",
 
909
  {
910
  "name": "model.layers.16.mlp.down_proj.q_weight",
911
  "shape": [
912
+ 576,
913
+ 192
914
  ],
915
  "dtype": "uint32",
916
  "format": "f32-to-bf16",
 
920
  {
921
  "name": "model.layers.16.mlp.down_proj.q_scale",
922
  "shape": [
923
+ 576,
924
+ 48
925
  ],
926
  "dtype": "float16",
927
  "format": "f32-to-bf16",
 
931
  {
932
  "name": "model.layers.16.mlp.gate_up_proj.q_weight",
933
  "shape": [
934
+ 3072,
935
+ 72
936
  ],
937
  "dtype": "uint32",
938
  "format": "f32-to-bf16",
 
942
  {
943
  "name": "model.layers.16.mlp.gate_up_proj.q_scale",
944
  "shape": [
945
+ 3072,
946
+ 18
947
  ],
948
  "dtype": "float16",
949
  "format": "f32-to-bf16",
 
961
  "byteOffset": 33363396
962
  }
963
  ],
964
+ "md5sum": "28cb4b478cefb107df66c43c177153b6"
965
  },
966
  {
967
  "dataPath": "params_shard_1.bin",
 
971
  {
972
  "name": "model.layers.16.self_attn.qkv_proj.q_weight",
973
  "shape": [
974
+ 960,
975
+ 72
976
  ],
977
  "dtype": "uint32",
978
  "format": "f32-to-bf16",
 
982
  {
983
  "name": "model.layers.16.self_attn.qkv_proj.q_scale",
984
  "shape": [
985
+ 960,
986
+ 18
987
  ],
988
  "dtype": "float16",
989
  "format": "f32-to-bf16",
 
993
  {
994
  "name": "model.layers.16.self_attn.o_proj.q_weight",
995
  "shape": [
996
+ 576,
997
+ 72
998
  ],
999
  "dtype": "uint32",
1000
  "format": "f32-to-bf16",
 
1004
  {
1005
  "name": "model.layers.16.self_attn.o_proj.q_scale",
1006
  "shape": [
1007
+ 576,
1008
+ 18
1009
  ],
1010
  "dtype": "float16",
1011
  "format": "f32-to-bf16",
 
1025
  {
1026
  "name": "model.layers.17.mlp.down_proj.q_weight",
1027
  "shape": [
1028
+ 576,
1029
+ 192
1030
  ],
1031
  "dtype": "uint32",
1032
  "format": "f32-to-bf16",
 
1036
  {
1037
  "name": "model.layers.17.mlp.down_proj.q_scale",
1038
  "shape": [
1039
+ 576,
1040
+ 48
1041
  ],
1042
  "dtype": "float16",
1043
  "format": "f32-to-bf16",
 
1047
  {
1048
  "name": "model.layers.17.mlp.gate_up_proj.q_weight",
1049
  "shape": [
1050
+ 3072,
1051
+ 72
1052
  ],
1053
  "dtype": "uint32",
1054
  "format": "f32-to-bf16",
 
1058
  {
1059
  "name": "model.layers.17.mlp.gate_up_proj.q_scale",
1060
  "shape": [
1061
+ 3072,
1062
+ 18
1063
  ],
1064
  "dtype": "float16",
1065
  "format": "f32-to-bf16",
 
1079
  {
1080
  "name": "model.layers.17.self_attn.qkv_proj.q_weight",
1081
  "shape": [
1082
+ 960,
1083
+ 72
1084
  ],
1085
  "dtype": "uint32",
1086
  "format": "f32-to-bf16",
 
1090
  {
1091
  "name": "model.layers.17.self_attn.qkv_proj.q_scale",
1092
  "shape": [
1093
+ 960,
1094
+ 18
1095
  ],
1096
  "dtype": "float16",
1097
  "format": "f32-to-bf16",
 
1101
  {
1102
  "name": "model.layers.17.self_attn.o_proj.q_weight",
1103
  "shape": [
1104
+ 576,
1105
+ 72
1106
  ],
1107
  "dtype": "uint32",
1108
  "format": "f32-to-bf16",
 
1112
  {
1113
  "name": "model.layers.17.self_attn.o_proj.q_scale",
1114
  "shape": [
1115
+ 576,
1116
+ 18
1117
  ],
1118
  "dtype": "float16",
1119
  "format": "f32-to-bf16",
 
1133
  {
1134
  "name": "model.layers.18.mlp.down_proj.q_weight",
1135
  "shape": [
1136
+ 576,
1137
+ 192
1138
  ],
1139
  "dtype": "uint32",
1140
  "format": "f32-to-bf16",
 
1144
  {
1145
  "name": "model.layers.18.mlp.down_proj.q_scale",
1146
  "shape": [
1147
+ 576,
1148
+ 48
1149
  ],
1150
  "dtype": "float16",
1151
  "format": "f32-to-bf16",
 
1155
  {
1156
  "name": "model.layers.18.mlp.gate_up_proj.q_weight",
1157
  "shape": [
1158
+ 3072,
1159
+ 72
1160
  ],
1161
  "dtype": "uint32",
1162
  "format": "f32-to-bf16",
 
1166
  {
1167
  "name": "model.layers.18.mlp.gate_up_proj.q_scale",
1168
  "shape": [
1169
+ 3072,
1170
+ 18
1171
  ],
1172
  "dtype": "float16",
1173
  "format": "f32-to-bf16",
 
1187
  {
1188
  "name": "model.layers.18.self_attn.qkv_proj.q_weight",
1189
  "shape": [
1190
+ 960,
1191
+ 72
1192
  ],
1193
  "dtype": "uint32",
1194
  "format": "f32-to-bf16",
 
1198
  {
1199
  "name": "model.layers.18.self_attn.qkv_proj.q_scale",
1200
  "shape": [
1201
+ 960,
1202
+ 18
1203
  ],
1204
  "dtype": "float16",
1205
  "format": "f32-to-bf16",
 
1209
  {
1210
  "name": "model.layers.18.self_attn.o_proj.q_weight",
1211
  "shape": [
1212
+ 576,
1213
+ 72
1214
  ],
1215
  "dtype": "uint32",
1216
  "format": "f32-to-bf16",
 
1220
  {
1221
  "name": "model.layers.18.self_attn.o_proj.q_scale",
1222
  "shape": [
1223
+ 576,
1224
+ 18
1225
  ],
1226
  "dtype": "float16",
1227
  "format": "f32-to-bf16",
 
1241
  {
1242
  "name": "model.layers.19.mlp.down_proj.q_weight",
1243
  "shape": [
1244
+ 576,
1245
+ 192
1246
  ],
1247
  "dtype": "uint32",
1248
  "format": "f32-to-bf16",
 
1252
  {
1253
  "name": "model.layers.19.mlp.down_proj.q_scale",
1254
  "shape": [
1255
+ 576,
1256
+ 48
1257
  ],
1258
  "dtype": "float16",
1259
  "format": "f32-to-bf16",
 
1263
  {
1264
  "name": "model.layers.19.mlp.gate_up_proj.q_weight",
1265
  "shape": [
1266
+ 3072,
1267
+ 72
1268
  ],
1269
  "dtype": "uint32",
1270
  "format": "f32-to-bf16",
 
1274
  {
1275
  "name": "model.layers.19.mlp.gate_up_proj.q_scale",
1276
  "shape": [
1277
+ 3072,
1278
+ 18
1279
  ],
1280
  "dtype": "float16",
1281
  "format": "f32-to-bf16",
 
1295
  {
1296
  "name": "model.layers.19.self_attn.qkv_proj.q_weight",
1297
  "shape": [
1298
+ 960,
1299
+ 72
1300
  ],
1301
  "dtype": "uint32",
1302
  "format": "f32-to-bf16",
 
1306
  {
1307
  "name": "model.layers.19.self_attn.qkv_proj.q_scale",
1308
  "shape": [
1309
+ 960,
1310
+ 18
1311
  ],
1312
  "dtype": "float16",
1313
  "format": "f32-to-bf16",
 
1317
  {
1318
  "name": "model.layers.19.self_attn.o_proj.q_weight",
1319
  "shape": [
1320
+ 576,
1321
+ 72
1322
  ],
1323
  "dtype": "uint32",
1324
  "format": "f32-to-bf16",
 
1328
  {
1329
  "name": "model.layers.19.self_attn.o_proj.q_scale",
1330
  "shape": [
1331
+ 576,
1332
+ 18
1333
  ],
1334
  "dtype": "float16",
1335
  "format": "f32-to-bf16",
 
1349
  {
1350
  "name": "model.layers.2.mlp.down_proj.q_weight",
1351
  "shape": [
1352
+ 576,
1353
+ 192
1354
  ],
1355
  "dtype": "uint32",
1356
  "format": "f32-to-bf16",
 
1360
  {
1361
  "name": "model.layers.2.mlp.down_proj.q_scale",
1362
  "shape": [
1363
+ 576,
1364
+ 48
1365
  ],
1366
  "dtype": "float16",
1367
  "format": "f32-to-bf16",
 
1371
  {
1372
  "name": "model.layers.2.mlp.gate_up_proj.q_weight",
1373
  "shape": [
1374
+ 3072,
1375
+ 72
1376
  ],
1377
  "dtype": "uint32",
1378
  "format": "f32-to-bf16",
 
1382
  {
1383
  "name": "model.layers.2.mlp.gate_up_proj.q_scale",
1384
  "shape": [
1385
+ 3072,
1386
+ 18
1387
  ],
1388
  "dtype": "float16",
1389
  "format": "f32-to-bf16",
 
1403
  {
1404
  "name": "model.layers.2.self_attn.qkv_proj.q_weight",
1405
  "shape": [
1406
+ 960,
1407
+ 72
1408
  ],
1409
  "dtype": "uint32",
1410
  "format": "f32-to-bf16",
 
1414
  {
1415
  "name": "model.layers.2.self_attn.qkv_proj.q_scale",
1416
  "shape": [
1417
+ 960,
1418
+ 18
1419
  ],
1420
  "dtype": "float16",
1421
  "format": "f32-to-bf16",
 
1425
  {
1426
  "name": "model.layers.2.self_attn.o_proj.q_weight",
1427
  "shape": [
1428
+ 576,
1429
+ 72
1430
  ],
1431
  "dtype": "uint32",
1432
  "format": "f32-to-bf16",
 
1436
  {
1437
  "name": "model.layers.2.self_attn.o_proj.q_scale",
1438
  "shape": [
1439
+ 576,
1440
+ 18
1441
  ],
1442
  "dtype": "float16",
1443
  "format": "f32-to-bf16",
 
1457
  {
1458
  "name": "model.layers.20.mlp.down_proj.q_weight",
1459
  "shape": [
1460
+ 576,
1461
+ 192
1462
  ],
1463
  "dtype": "uint32",
1464
  "format": "f32-to-bf16",
 
1468
  {
1469
  "name": "model.layers.20.mlp.down_proj.q_scale",
1470
  "shape": [
1471
+ 576,
1472
+ 48
1473
  ],
1474
  "dtype": "float16",
1475
  "format": "f32-to-bf16",
 
1479
  {
1480
  "name": "model.layers.20.mlp.gate_up_proj.q_weight",
1481
  "shape": [
1482
+ 3072,
1483
+ 72
1484
  ],
1485
  "dtype": "uint32",
1486
  "format": "f32-to-bf16",
 
1490
  {
1491
  "name": "model.layers.20.mlp.gate_up_proj.q_scale",
1492
  "shape": [
1493
+ 3072,
1494
+ 18
1495
  ],
1496
  "dtype": "float16",
1497
  "format": "f32-to-bf16",
 
1511
  {
1512
  "name": "model.layers.20.self_attn.qkv_proj.q_weight",
1513
  "shape": [
1514
+ 960,
1515
+ 72
1516
  ],
1517
  "dtype": "uint32",
1518
  "format": "f32-to-bf16",
 
1522
  {
1523
  "name": "model.layers.20.self_attn.qkv_proj.q_scale",
1524
  "shape": [
1525
+ 960,
1526
+ 18
1527
  ],
1528
  "dtype": "float16",
1529
  "format": "f32-to-bf16",
 
1533
  {
1534
  "name": "model.layers.20.self_attn.o_proj.q_weight",
1535
  "shape": [
1536
+ 576,
1537
+ 72
1538
  ],
1539
  "dtype": "uint32",
1540
  "format": "f32-to-bf16",
 
1544
  {
1545
  "name": "model.layers.20.self_attn.o_proj.q_scale",
1546
  "shape": [
1547
+ 576,
1548
+ 18
1549
  ],
1550
  "dtype": "float16",
1551
  "format": "f32-to-bf16",
 
1565
  {
1566
  "name": "model.layers.21.mlp.down_proj.q_weight",
1567
  "shape": [
1568
+ 576,
1569
+ 192
1570
  ],
1571
  "dtype": "uint32",
1572
  "format": "f32-to-bf16",
 
1576
  {
1577
  "name": "model.layers.21.mlp.down_proj.q_scale",
1578
  "shape": [
1579
+ 576,
1580
+ 48
1581
  ],
1582
  "dtype": "float16",
1583
  "format": "f32-to-bf16",
 
1587
  {
1588
  "name": "model.layers.21.mlp.gate_up_proj.q_weight",
1589
  "shape": [
1590
+ 3072,
1591
+ 72
1592
  ],
1593
  "dtype": "uint32",
1594
  "format": "f32-to-bf16",
 
1598
  {
1599
  "name": "model.layers.21.mlp.gate_up_proj.q_scale",
1600
  "shape": [
1601
+ 3072,
1602
+ 18
1603
  ],
1604
  "dtype": "float16",
1605
  "format": "f32-to-bf16",
 
1619
  {
1620
  "name": "model.layers.21.self_attn.qkv_proj.q_weight",
1621
  "shape": [
1622
+ 960,
1623
+ 72
1624
  ],
1625
  "dtype": "uint32",
1626
  "format": "f32-to-bf16",
 
1630
  {
1631
  "name": "model.layers.21.self_attn.qkv_proj.q_scale",
1632
  "shape": [
1633
+ 960,
1634
+ 18
1635
  ],
1636
  "dtype": "float16",
1637
  "format": "f32-to-bf16",
 
1641
  {
1642
  "name": "model.layers.21.self_attn.o_proj.q_weight",
1643
  "shape": [
1644
+ 576,
1645
+ 72
1646
  ],
1647
  "dtype": "uint32",
1648
  "format": "f32-to-bf16",
 
1652
  {
1653
  "name": "model.layers.21.self_attn.o_proj.q_scale",
1654
  "shape": [
1655
+ 576,
1656
+ 18
1657
  ],
1658
  "dtype": "float16",
1659
  "format": "f32-to-bf16",
 
1673
  {
1674
  "name": "model.layers.22.mlp.down_proj.q_weight",
1675
  "shape": [
1676
+ 576,
1677
+ 192
1678
  ],
1679
  "dtype": "uint32",
1680
  "format": "f32-to-bf16",
 
1684
  {
1685
  "name": "model.layers.22.mlp.down_proj.q_scale",
1686
  "shape": [
1687
+ 576,
1688
+ 48
1689
  ],
1690
  "dtype": "float16",
1691
  "format": "f32-to-bf16",
 
1695
  {
1696
  "name": "model.layers.22.mlp.gate_up_proj.q_weight",
1697
  "shape": [
1698
+ 3072,
1699
+ 72
1700
  ],
1701
  "dtype": "uint32",
1702
  "format": "f32-to-bf16",
 
1706
  {
1707
  "name": "model.layers.22.mlp.gate_up_proj.q_scale",
1708
  "shape": [
1709
+ 3072,
1710
+ 18
1711
  ],
1712
  "dtype": "float16",
1713
  "format": "f32-to-bf16",
 
1727
  {
1728
  "name": "model.layers.22.self_attn.qkv_proj.q_weight",
1729
  "shape": [
1730
+ 960,
1731
+ 72
1732
  ],
1733
  "dtype": "uint32",
1734
  "format": "f32-to-bf16",
 
1738
  {
1739
  "name": "model.layers.22.self_attn.qkv_proj.q_scale",
1740
  "shape": [
1741
+ 960,
1742
+ 18
1743
  ],
1744
  "dtype": "float16",
1745
  "format": "f32-to-bf16",
 
1749
  {
1750
  "name": "model.layers.22.self_attn.o_proj.q_weight",
1751
  "shape": [
1752
+ 576,
1753
+ 72
1754
  ],
1755
  "dtype": "uint32",
1756
  "format": "f32-to-bf16",
 
1760
  {
1761
  "name": "model.layers.22.self_attn.o_proj.q_scale",
1762
  "shape": [
1763
+ 576,
1764
+ 18
1765
  ],
1766
  "dtype": "float16",
1767
  "format": "f32-to-bf16",
 
1781
  {
1782
  "name": "model.layers.23.mlp.down_proj.q_weight",
1783
  "shape": [
1784
+ 576,
1785
+ 192
1786
  ],
1787
  "dtype": "uint32",
1788
  "format": "f32-to-bf16",
 
1792
  {
1793
  "name": "model.layers.23.mlp.down_proj.q_scale",
1794
  "shape": [
1795
+ 576,
1796
+ 48
1797
  ],
1798
  "dtype": "float16",
1799
  "format": "f32-to-bf16",
 
1803
  {
1804
  "name": "model.layers.23.mlp.gate_up_proj.q_weight",
1805
  "shape": [
1806
+ 3072,
1807
+ 72
1808
  ],
1809
  "dtype": "uint32",
1810
  "format": "f32-to-bf16",
 
1814
  {
1815
  "name": "model.layers.23.mlp.gate_up_proj.q_scale",
1816
  "shape": [
1817
+ 3072,
1818
+ 18
1819
  ],
1820
  "dtype": "float16",
1821
  "format": "f32-to-bf16",
 
1835
  {
1836
  "name": "model.layers.23.self_attn.qkv_proj.q_weight",
1837
  "shape": [
1838
+ 960,
1839
+ 72
1840
  ],
1841
  "dtype": "uint32",
1842
  "format": "f32-to-bf16",
 
1846
  {
1847
  "name": "model.layers.23.self_attn.qkv_proj.q_scale",
1848
  "shape": [
1849
+ 960,
1850
+ 18
1851
  ],
1852
  "dtype": "float16",
1853
  "format": "f32-to-bf16",
 
1857
  {
1858
  "name": "model.layers.23.self_attn.o_proj.q_weight",
1859
  "shape": [
1860
+ 576,
1861
+ 72
1862
  ],
1863
  "dtype": "uint32",
1864
  "format": "f32-to-bf16",
 
1868
  {
1869
  "name": "model.layers.23.self_attn.o_proj.q_scale",
1870
  "shape": [
1871
+ 576,
1872
+ 18
1873
  ],
1874
  "dtype": "float16",
1875
  "format": "f32-to-bf16",
 
1889
  {
1890
  "name": "model.layers.24.mlp.down_proj.q_weight",
1891
  "shape": [
1892
+ 576,
1893
+ 192
1894
  ],
1895
  "dtype": "uint32",
1896
  "format": "f32-to-bf16",
 
1900
  {
1901
  "name": "model.layers.24.mlp.down_proj.q_scale",
1902
  "shape": [
1903
+ 576,
1904
+ 48
1905
  ],
1906
  "dtype": "float16",
1907
  "format": "f32-to-bf16",
 
1911
  {
1912
  "name": "model.layers.24.mlp.gate_up_proj.q_weight",
1913
  "shape": [
1914
+ 3072,
1915
+ 72
1916
  ],
1917
  "dtype": "uint32",
1918
  "format": "f32-to-bf16",
 
1922
  {
1923
  "name": "model.layers.24.mlp.gate_up_proj.q_scale",
1924
  "shape": [
1925
+ 3072,
1926
+ 18
1927
  ],
1928
  "dtype": "float16",
1929
  "format": "f32-to-bf16",
 
1943
  {
1944
  "name": "model.layers.24.self_attn.qkv_proj.q_weight",
1945
  "shape": [
1946
+ 960,
1947
+ 72
1948
  ],
1949
  "dtype": "uint32",
1950
  "format": "f32-to-bf16",
 
1954
  {
1955
  "name": "model.layers.24.self_attn.qkv_proj.q_scale",
1956
  "shape": [
1957
+ 960,
1958
+ 18
1959
  ],
1960
  "dtype": "float16",
1961
  "format": "f32-to-bf16",
 
1965
  {
1966
  "name": "model.layers.24.self_attn.o_proj.q_weight",
1967
  "shape": [
1968
+ 576,
1969
+ 72
1970
  ],
1971
  "dtype": "uint32",
1972
  "format": "f32-to-bf16",
 
1976
  {
1977
  "name": "model.layers.24.self_attn.o_proj.q_scale",
1978
  "shape": [
1979
+ 576,
1980
+ 18
1981
  ],
1982
  "dtype": "float16",
1983
  "format": "f32-to-bf16",
 
1997
  {
1998
  "name": "model.layers.25.mlp.down_proj.q_weight",
1999
  "shape": [
2000
+ 576,
2001
+ 192
2002
  ],
2003
  "dtype": "uint32",
2004
  "format": "f32-to-bf16",
 
2008
  {
2009
  "name": "model.layers.25.mlp.down_proj.q_scale",
2010
  "shape": [
2011
+ 576,
2012
+ 48
2013
  ],
2014
  "dtype": "float16",
2015
  "format": "f32-to-bf16",
 
2019
  {
2020
  "name": "model.layers.25.mlp.gate_up_proj.q_weight",
2021
  "shape": [
2022
+ 3072,
2023
+ 72
2024
  ],
2025
  "dtype": "uint32",
2026
  "format": "f32-to-bf16",
 
2030
  {
2031
  "name": "model.layers.25.mlp.gate_up_proj.q_scale",
2032
  "shape": [
2033
+ 3072,
2034
+ 18
2035
  ],
2036
  "dtype": "float16",
2037
  "format": "f32-to-bf16",
 
2051
  {
2052
  "name": "model.layers.25.self_attn.qkv_proj.q_weight",
2053
  "shape": [
2054
+ 960,
2055
+ 72
2056
  ],
2057
  "dtype": "uint32",
2058
  "format": "f32-to-bf16",
 
2062
  {
2063
  "name": "model.layers.25.self_attn.qkv_proj.q_scale",
2064
  "shape": [
2065
+ 960,
2066
+ 18
2067
  ],
2068
  "dtype": "float16",
2069
  "format": "f32-to-bf16",
 
2073
  {
2074
  "name": "model.layers.25.self_attn.o_proj.q_weight",
2075
  "shape": [
2076
+ 576,
2077
+ 72
2078
  ],
2079
  "dtype": "uint32",
2080
  "format": "f32-to-bf16",
 
2084
  {
2085
  "name": "model.layers.25.self_attn.o_proj.q_scale",
2086
  "shape": [
2087
+ 576,
2088
+ 18
2089
  ],
2090
  "dtype": "float16",
2091
  "format": "f32-to-bf16",
 
2105
  {
2106
  "name": "model.layers.26.mlp.down_proj.q_weight",
2107
  "shape": [
2108
+ 576,
2109
+ 192
2110
  ],
2111
  "dtype": "uint32",
2112
  "format": "f32-to-bf16",
 
2116
  {
2117
  "name": "model.layers.26.mlp.down_proj.q_scale",
2118
  "shape": [
2119
+ 576,
2120
+ 48
2121
  ],
2122
  "dtype": "float16",
2123
  "format": "f32-to-bf16",
 
2127
  {
2128
  "name": "model.layers.26.mlp.gate_up_proj.q_weight",
2129
  "shape": [
2130
+ 3072,
2131
+ 72
2132
  ],
2133
  "dtype": "uint32",
2134
  "format": "f32-to-bf16",
 
2138
  {
2139
  "name": "model.layers.26.mlp.gate_up_proj.q_scale",
2140
  "shape": [
2141
+ 3072,
2142
+ 18
2143
  ],
2144
  "dtype": "float16",
2145
  "format": "f32-to-bf16",
 
2159
  {
2160
  "name": "model.layers.26.self_attn.qkv_proj.q_weight",
2161
  "shape": [
2162
+ 960,
2163
+ 72
2164
  ],
2165
  "dtype": "uint32",
2166
  "format": "f32-to-bf16",
 
2170
  {
2171
  "name": "model.layers.26.self_attn.qkv_proj.q_scale",
2172
  "shape": [
2173
+ 960,
2174
+ 18
2175
  ],
2176
  "dtype": "float16",
2177
  "format": "f32-to-bf16",
 
2181
  {
2182
  "name": "model.layers.26.self_attn.o_proj.q_weight",
2183
  "shape": [
2184
+ 576,
2185
+ 72
2186
  ],
2187
  "dtype": "uint32",
2188
  "format": "f32-to-bf16",
 
2192
  {
2193
  "name": "model.layers.26.self_attn.o_proj.q_scale",
2194
  "shape": [
2195
+ 576,
2196
+ 18
2197
  ],
2198
  "dtype": "float16",
2199
  "format": "f32-to-bf16",
 
2213
  {
2214
  "name": "model.layers.27.mlp.down_proj.q_weight",
2215
  "shape": [
2216
+ 576,
2217
+ 192
2218
  ],
2219
  "dtype": "uint32",
2220
  "format": "f32-to-bf16",
 
2224
  {
2225
  "name": "model.layers.27.mlp.down_proj.q_scale",
2226
  "shape": [
2227
+ 576,
2228
+ 48
2229
  ],
2230
  "dtype": "float16",
2231
  "format": "f32-to-bf16",
 
2235
  {
2236
  "name": "model.layers.27.mlp.gate_up_proj.q_weight",
2237
  "shape": [
2238
+ 3072,
2239
+ 72
2240
  ],
2241
  "dtype": "uint32",
2242
  "format": "f32-to-bf16",
 
2246
  {
2247
  "name": "model.layers.27.mlp.gate_up_proj.q_scale",
2248
  "shape": [
2249
+ 3072,
2250
+ 18
2251
  ],
2252
  "dtype": "float16",
2253
  "format": "f32-to-bf16",
 
2267
  {
2268
  "name": "model.layers.27.self_attn.qkv_proj.q_weight",
2269
  "shape": [
2270
+ 960,
2271
+ 72
2272
  ],
2273
  "dtype": "uint32",
2274
  "format": "f32-to-bf16",
 
2278
  {
2279
  "name": "model.layers.27.self_attn.qkv_proj.q_scale",
2280
  "shape": [
2281
+ 960,
2282
+ 18
2283
  ],
2284
  "dtype": "float16",
2285
  "format": "f32-to-bf16",
 
2289
  {
2290
  "name": "model.layers.27.self_attn.o_proj.q_weight",
2291
  "shape": [
2292
+ 576,
2293
+ 72
2294
  ],
2295
  "dtype": "uint32",
2296
  "format": "f32-to-bf16",
 
2300
  {
2301
  "name": "model.layers.27.self_attn.o_proj.q_scale",
2302
  "shape": [
2303
+ 576,
2304
+ 18
2305
  ],
2306
  "dtype": "float16",
2307
  "format": "f32-to-bf16",
 
2321
  {
2322
  "name": "model.layers.28.mlp.down_proj.q_weight",
2323
  "shape": [
2324
+ 576,
2325
+ 192
2326
  ],
2327
  "dtype": "uint32",
2328
  "format": "f32-to-bf16",
 
2332
  {
2333
  "name": "model.layers.28.mlp.down_proj.q_scale",
2334
  "shape": [
2335
+ 576,
2336
+ 48
2337
  ],
2338
  "dtype": "float16",
2339
  "format": "f32-to-bf16",
 
2343
  {
2344
  "name": "model.layers.28.mlp.gate_up_proj.q_weight",
2345
  "shape": [
2346
+ 3072,
2347
+ 72
2348
  ],
2349
  "dtype": "uint32",
2350
  "format": "f32-to-bf16",
 
2354
  {
2355
  "name": "model.layers.28.mlp.gate_up_proj.q_scale",
2356
  "shape": [
2357
+ 3072,
2358
+ 18
2359
  ],
2360
  "dtype": "float16",
2361
  "format": "f32-to-bf16",
 
2375
  {
2376
  "name": "model.layers.28.self_attn.qkv_proj.q_weight",
2377
  "shape": [
2378
+ 960,
2379
+ 72
2380
  ],
2381
  "dtype": "uint32",
2382
  "format": "f32-to-bf16",
 
2386
  {
2387
  "name": "model.layers.28.self_attn.qkv_proj.q_scale",
2388
  "shape": [
2389
+ 960,
2390
+ 18
2391
  ],
2392
  "dtype": "float16",
2393
  "format": "f32-to-bf16",
 
2397
  {
2398
  "name": "model.layers.28.self_attn.o_proj.q_weight",
2399
  "shape": [
2400
+ 576,
2401
+ 72
2402
  ],
2403
  "dtype": "uint32",
2404
  "format": "f32-to-bf16",
 
2408
  {
2409
  "name": "model.layers.28.self_attn.o_proj.q_scale",
2410
  "shape": [
2411
+ 576,
2412
+ 18
2413
  ],
2414
  "dtype": "float16",
2415
  "format": "f32-to-bf16",
 
2429
  {
2430
  "name": "model.layers.29.mlp.down_proj.q_weight",
2431
  "shape": [
2432
+ 576,
2433
+ 192
2434
  ],
2435
  "dtype": "uint32",
2436
  "format": "f32-to-bf16",
 
2440
  {
2441
  "name": "model.layers.29.mlp.down_proj.q_scale",
2442
  "shape": [
2443
+ 576,
2444
+ 48
2445
  ],
2446
  "dtype": "float16",
2447
  "format": "f32-to-bf16",
 
2451
  {
2452
  "name": "model.layers.29.mlp.gate_up_proj.q_weight",
2453
  "shape": [
2454
+ 3072,
2455
+ 72
2456
  ],
2457
  "dtype": "uint32",
2458
  "format": "f32-to-bf16",
 
2462
  {
2463
  "name": "model.layers.29.mlp.gate_up_proj.q_scale",
2464
  "shape": [
2465
+ 3072,
2466
+ 18
2467
  ],
2468
  "dtype": "float16",
2469
  "format": "f32-to-bf16",
 
2483
  {
2484
  "name": "model.layers.29.self_attn.qkv_proj.q_weight",
2485
  "shape": [
2486
+ 960,
2487
+ 72
2488
  ],
2489
  "dtype": "uint32",
2490
  "format": "f32-to-bf16",
 
2494
  {
2495
  "name": "model.layers.29.self_attn.qkv_proj.q_scale",
2496
  "shape": [
2497
+ 960,
2498
+ 18
2499
  ],
2500
  "dtype": "float16",
2501
  "format": "f32-to-bf16",
 
2505
  {
2506
  "name": "model.layers.29.self_attn.o_proj.q_weight",
2507
  "shape": [
2508
+ 576,
2509
+ 72
2510
  ],
2511
  "dtype": "uint32",
2512
  "format": "f32-to-bf16",
 
2516
  {
2517
  "name": "model.layers.29.self_attn.o_proj.q_scale",
2518
  "shape": [
2519
+ 576,
2520
+ 18
2521
  ],
2522
  "dtype": "float16",
2523
  "format": "f32-to-bf16",
 
2537
  {
2538
  "name": "model.layers.3.mlp.down_proj.q_weight",
2539
  "shape": [
2540
+ 576,
2541
+ 192
2542
  ],
2543
  "dtype": "uint32",
2544
  "format": "f32-to-bf16",
 
2548
  {
2549
  "name": "model.layers.3.mlp.down_proj.q_scale",
2550
  "shape": [
2551
+ 576,
2552
+ 48
2553
  ],
2554
  "dtype": "float16",
2555
  "format": "f32-to-bf16",
 
2559
  {
2560
  "name": "model.layers.3.mlp.gate_up_proj.q_weight",
2561
  "shape": [
2562
+ 3072,
2563
+ 72
2564
  ],
2565
  "dtype": "uint32",
2566
  "format": "f32-to-bf16",
 
2570
  {
2571
  "name": "model.layers.3.mlp.gate_up_proj.q_scale",
2572
  "shape": [
2573
+ 3072,
2574
+ 18
2575
  ],
2576
  "dtype": "float16",
2577
  "format": "f32-to-bf16",
 
2591
  {
2592
  "name": "model.layers.3.self_attn.qkv_proj.q_weight",
2593
  "shape": [
2594
+ 960,
2595
+ 72
2596
  ],
2597
  "dtype": "uint32",
2598
  "format": "f32-to-bf16",
 
2602
  {
2603
  "name": "model.layers.3.self_attn.qkv_proj.q_scale",
2604
  "shape": [
2605
+ 960,
2606
+ 18
2607
  ],
2608
  "dtype": "float16",
2609
  "format": "f32-to-bf16",
 
2613
  {
2614
  "name": "model.layers.3.self_attn.o_proj.q_weight",
2615
  "shape": [
2616
+ 576,
2617
+ 72
2618
  ],
2619
  "dtype": "uint32",
2620
  "format": "f32-to-bf16",
 
2624
  {
2625
  "name": "model.layers.3.self_attn.o_proj.q_scale",
2626
  "shape": [
2627
+ 576,
2628
+ 18
2629
  ],
2630
  "dtype": "float16",
2631
  "format": "f32-to-bf16",
 
2645
  {
2646
  "name": "model.layers.4.mlp.down_proj.q_weight",
2647
  "shape": [
2648
+ 576,
2649
+ 192
2650
  ],
2651
  "dtype": "uint32",
2652
  "format": "f32-to-bf16",
 
2656
  {
2657
  "name": "model.layers.4.mlp.down_proj.q_scale",
2658
  "shape": [
2659
+ 576,
2660
+ 48
2661
  ],
2662
  "dtype": "float16",
2663
  "format": "f32-to-bf16",
 
2667
  {
2668
  "name": "model.layers.4.mlp.gate_up_proj.q_weight",
2669
  "shape": [
2670
+ 3072,
2671
+ 72
2672
  ],
2673
  "dtype": "uint32",
2674
  "format": "f32-to-bf16",
 
2678
  {
2679
  "name": "model.layers.4.mlp.gate_up_proj.q_scale",
2680
  "shape": [
2681
+ 3072,
2682
+ 18
2683
  ],
2684
  "dtype": "float16",
2685
  "format": "f32-to-bf16",
 
2699
  {
2700
  "name": "model.layers.4.self_attn.qkv_proj.q_weight",
2701
  "shape": [
2702
+ 960,
2703
+ 72
2704
  ],
2705
  "dtype": "uint32",
2706
  "format": "f32-to-bf16",
 
2710
  {
2711
  "name": "model.layers.4.self_attn.qkv_proj.q_scale",
2712
  "shape": [
2713
+ 960,
2714
+ 18
2715
  ],
2716
  "dtype": "float16",
2717
  "format": "f32-to-bf16",
 
2721
  {
2722
  "name": "model.layers.4.self_attn.o_proj.q_weight",
2723
  "shape": [
2724
+ 576,
2725
+ 72
2726
  ],
2727
  "dtype": "uint32",
2728
  "format": "f32-to-bf16",
 
2732
  {
2733
  "name": "model.layers.4.self_attn.o_proj.q_scale",
2734
  "shape": [
2735
+ 576,
2736
+ 18
2737
  ],
2738
  "dtype": "float16",
2739
  "format": "f32-to-bf16",
 
2753
  {
2754
  "name": "model.layers.5.mlp.down_proj.q_weight",
2755
  "shape": [
2756
+ 576,
2757
+ 192
2758
  ],
2759
  "dtype": "uint32",
2760
  "format": "f32-to-bf16",
 
2764
  {
2765
  "name": "model.layers.5.mlp.down_proj.q_scale",
2766
  "shape": [
2767
+ 576,
2768
+ 48
2769
  ],
2770
  "dtype": "float16",
2771
  "format": "f32-to-bf16",
 
2773
  "byteOffset": 32828544
2774
  }
2775
  ],
2776
+ "md5sum": "b31a66d8b09f833eba9e80bd7b3d8cac"
2777
  },
2778
  {
2779
  "dataPath": "params_shard_2.bin",
 
2783
  {
2784
  "name": "model.layers.5.mlp.gate_up_proj.q_weight",
2785
  "shape": [
2786
+ 3072,
2787
+ 72
2788
  ],
2789
  "dtype": "uint32",
2790
  "format": "f32-to-bf16",
 
2794
  {
2795
  "name": "model.layers.5.mlp.gate_up_proj.q_scale",
2796
  "shape": [
2797
+ 3072,
2798
+ 18
2799
  ],
2800
  "dtype": "float16",
2801
  "format": "f32-to-bf16",
 
2815
  {
2816
  "name": "model.layers.5.self_attn.qkv_proj.q_weight",
2817
  "shape": [
2818
+ 960,
2819
+ 72
2820
  ],
2821
  "dtype": "uint32",
2822
  "format": "f32-to-bf16",
 
2826
  {
2827
  "name": "model.layers.5.self_attn.qkv_proj.q_scale",
2828
  "shape": [
2829
+ 960,
2830
+ 18
2831
  ],
2832
  "dtype": "float16",
2833
  "format": "f32-to-bf16",
 
2837
  {
2838
  "name": "model.layers.5.self_attn.o_proj.q_weight",
2839
  "shape": [
2840
+ 576,
2841
+ 72
2842
  ],
2843
  "dtype": "uint32",
2844
  "format": "f32-to-bf16",
 
2848
  {
2849
  "name": "model.layers.5.self_attn.o_proj.q_scale",
2850
  "shape": [
2851
+ 576,
2852
+ 18
2853
  ],
2854
  "dtype": "float16",
2855
  "format": "f32-to-bf16",
 
2869
  {
2870
  "name": "model.layers.6.mlp.down_proj.q_weight",
2871
  "shape": [
2872
+ 576,
2873
+ 192
2874
  ],
2875
  "dtype": "uint32",
2876
  "format": "f32-to-bf16",
 
2880
  {
2881
  "name": "model.layers.6.mlp.down_proj.q_scale",
2882
  "shape": [
2883
+ 576,
2884
+ 48
2885
  ],
2886
  "dtype": "float16",
2887
  "format": "f32-to-bf16",
 
2891
  {
2892
  "name": "model.layers.6.mlp.gate_up_proj.q_weight",
2893
  "shape": [
2894
+ 3072,
2895
+ 72
2896
  ],
2897
  "dtype": "uint32",
2898
  "format": "f32-to-bf16",
 
2902
  {
2903
  "name": "model.layers.6.mlp.gate_up_proj.q_scale",
2904
  "shape": [
2905
+ 3072,
2906
+ 18
2907
  ],
2908
  "dtype": "float16",
2909
  "format": "f32-to-bf16",
 
2923
  {
2924
  "name": "model.layers.6.self_attn.qkv_proj.q_weight",
2925
  "shape": [
2926
+ 960,
2927
+ 72
2928
  ],
2929
  "dtype": "uint32",
2930
  "format": "f32-to-bf16",
 
2934
  {
2935
  "name": "model.layers.6.self_attn.qkv_proj.q_scale",
2936
  "shape": [
2937
+ 960,
2938
+ 18
2939
  ],
2940
  "dtype": "float16",
2941
  "format": "f32-to-bf16",
 
2945
  {
2946
  "name": "model.layers.6.self_attn.o_proj.q_weight",
2947
  "shape": [
2948
+ 576,
2949
+ 72
2950
  ],
2951
  "dtype": "uint32",
2952
  "format": "f32-to-bf16",
 
2956
  {
2957
  "name": "model.layers.6.self_attn.o_proj.q_scale",
2958
  "shape": [
2959
+ 576,
2960
+ 18
2961
  ],
2962
  "dtype": "float16",
2963
  "format": "f32-to-bf16",
 
2977
  {
2978
  "name": "model.layers.7.mlp.down_proj.q_weight",
2979
  "shape": [
2980
+ 576,
2981
+ 192
2982
  ],
2983
  "dtype": "uint32",
2984
  "format": "f32-to-bf16",
 
2988
  {
2989
  "name": "model.layers.7.mlp.down_proj.q_scale",
2990
  "shape": [
2991
+ 576,
2992
+ 48
2993
  ],
2994
  "dtype": "float16",
2995
  "format": "f32-to-bf16",
 
2999
  {
3000
  "name": "model.layers.7.mlp.gate_up_proj.q_weight",
3001
  "shape": [
3002
+ 3072,
3003
+ 72
3004
  ],
3005
  "dtype": "uint32",
3006
  "format": "f32-to-bf16",
 
3010
  {
3011
  "name": "model.layers.7.mlp.gate_up_proj.q_scale",
3012
  "shape": [
3013
+ 3072,
3014
+ 18
3015
  ],
3016
  "dtype": "float16",
3017
  "format": "f32-to-bf16",
 
3031
  {
3032
  "name": "model.layers.7.self_attn.qkv_proj.q_weight",
3033
  "shape": [
3034
+ 960,
3035
+ 72
3036
  ],
3037
  "dtype": "uint32",
3038
  "format": "f32-to-bf16",
 
3042
  {
3043
  "name": "model.layers.7.self_attn.qkv_proj.q_scale",
3044
  "shape": [
3045
+ 960,
3046
+ 18
3047
  ],
3048
  "dtype": "float16",
3049
  "format": "f32-to-bf16",
 
3053
  {
3054
  "name": "model.layers.7.self_attn.o_proj.q_weight",
3055
  "shape": [
3056
+ 576,
3057
+ 72
3058
  ],
3059
  "dtype": "uint32",
3060
  "format": "f32-to-bf16",
 
3064
  {
3065
  "name": "model.layers.7.self_attn.o_proj.q_scale",
3066
  "shape": [
3067
+ 576,
3068
+ 18
3069
  ],
3070
  "dtype": "float16",
3071
  "format": "f32-to-bf16",
 
3085
  {
3086
  "name": "model.layers.8.mlp.down_proj.q_weight",
3087
  "shape": [
3088
+ 576,
3089
+ 192
3090
  ],
3091
  "dtype": "uint32",
3092
  "format": "f32-to-bf16",
 
3096
  {
3097
  "name": "model.layers.8.mlp.down_proj.q_scale",
3098
  "shape": [
3099
+ 576,
3100
+ 48
3101
  ],
3102
  "dtype": "float16",
3103
  "format": "f32-to-bf16",
 
3107
  {
3108
  "name": "model.layers.8.mlp.gate_up_proj.q_weight",
3109
  "shape": [
3110
+ 3072,
3111
+ 72
3112
  ],
3113
  "dtype": "uint32",
3114
  "format": "f32-to-bf16",
 
3118
  {
3119
  "name": "model.layers.8.mlp.gate_up_proj.q_scale",
3120
  "shape": [
3121
+ 3072,
3122
+ 18
3123
  ],
3124
  "dtype": "float16",
3125
  "format": "f32-to-bf16",
 
3139
  {
3140
  "name": "model.layers.8.self_attn.qkv_proj.q_weight",
3141
  "shape": [
3142
+ 960,
3143
+ 72
3144
  ],
3145
  "dtype": "uint32",
3146
  "format": "f32-to-bf16",
 
3150
  {
3151
  "name": "model.layers.8.self_attn.qkv_proj.q_scale",
3152
  "shape": [
3153
+ 960,
3154
+ 18
3155
  ],
3156
  "dtype": "float16",
3157
  "format": "f32-to-bf16",
 
3161
  {
3162
  "name": "model.layers.8.self_attn.o_proj.q_weight",
3163
  "shape": [
3164
+ 576,
3165
+ 72
3166
  ],
3167
  "dtype": "uint32",
3168
  "format": "f32-to-bf16",
 
3172
  {
3173
  "name": "model.layers.8.self_attn.o_proj.q_scale",
3174
  "shape": [
3175
+ 576,
3176
+ 18
3177
  ],
3178
  "dtype": "float16",
3179
  "format": "f32-to-bf16",
 
3193
  {
3194
  "name": "model.layers.9.mlp.down_proj.q_weight",
3195
  "shape": [
3196
+ 576,
3197
+ 192
3198
  ],
3199
  "dtype": "uint32",
3200
  "format": "f32-to-bf16",
 
3204
  {
3205
  "name": "model.layers.9.mlp.down_proj.q_scale",
3206
  "shape": [
3207
+ 576,
3208
+ 48
3209
  ],
3210
  "dtype": "float16",
3211
  "format": "f32-to-bf16",
 
3215
  {
3216
  "name": "model.layers.9.mlp.gate_up_proj.q_weight",
3217
  "shape": [
3218
+ 3072,
3219
+ 72
3220
  ],
3221
  "dtype": "uint32",
3222
  "format": "f32-to-bf16",
 
3226
  {
3227
  "name": "model.layers.9.mlp.gate_up_proj.q_scale",
3228
  "shape": [
3229
+ 3072,
3230
+ 18
3231
  ],
3232
  "dtype": "float16",
3233
  "format": "f32-to-bf16",
 
3247
  {
3248
  "name": "model.layers.9.self_attn.qkv_proj.q_weight",
3249
  "shape": [
3250
+ 960,
3251
+ 72
3252
  ],
3253
  "dtype": "uint32",
3254
  "format": "f32-to-bf16",
 
3258
  {
3259
  "name": "model.layers.9.self_attn.qkv_proj.q_scale",
3260
  "shape": [
3261
+ 960,
3262
+ 18
3263
  ],
3264
  "dtype": "float16",
3265
  "format": "f32-to-bf16",
 
3269
  {
3270
  "name": "model.layers.9.self_attn.o_proj.q_weight",
3271
  "shape": [
3272
+ 576,
3273
+ 72
3274
  ],
3275
  "dtype": "uint32",
3276
  "format": "f32-to-bf16",
 
3280
  {
3281
  "name": "model.layers.9.self_attn.o_proj.q_scale",
3282
  "shape": [
3283
+ 576,
3284
+ 18
3285
  ],
3286
  "dtype": "float16",
3287
  "format": "f32-to-bf16",
 
3299
  "byteOffset": 9465984
3300
  }
3301
  ],
3302
+ "md5sum": "c3261fefb84bf0a587f6b809c586f774"
3303
  }
3304
  ]
3305
  }
params_shard_0.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a23c73b1da9cbc5df4ff85a354c18cc2414cc8da068bb1d595d3c6c95bd4d8f2
3
  size 33364548
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a23cedd1d63be5a27a9eac044e9eb9c37119da510f85f34636fefcfbb498815
3
  size 33364548
params_shard_1.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f429951e7f03ae96fd4e12f8c932659a21be30dbda93f3e32e1867d9ff0ec4b
3
  size 32883840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b54c0abc4d7bd83c814daeae4dde2ce272c5fe19f8ca2c0477e635159e3b2be9
3
  size 32883840
params_shard_2.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8aaa3f1010ae1592f6d9ec1ad198a25881ef1256546572b40bcb962377b8c482
3
  size 9467136
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:452584b4734e87b7e108b734e61a81d9af433d38bdc76be757fc02e068be3c18
3
  size 9467136