Pearush commited on
Commit
48a0ac9
1 Parent(s): fcb25d4

Upload PhiMoEForCausalLM

Browse files
config.json CHANGED
@@ -18,7 +18,7 @@
18
  "input_jitter_noise": 0.01,
19
  "intermediate_size": 2240,
20
  "lm_head_bias": true,
21
- "max_position_embeddings": 131072,
22
  "model_type": "phimoe",
23
  "num_attention_heads": 32,
24
  "num_experts_per_tok": 2,
@@ -28,148 +28,11 @@
28
  "original_max_position_embeddings": 4096,
29
  "output_router_logits": false,
30
  "rms_norm_eps": 1e-05,
31
- "rope_scaling": {
32
- "long_factor": [
33
- 1.0199999809265137,
34
- 1.0299999713897705,
35
- 1.0399999618530273,
36
- 1.0499999523162842,
37
- 1.0499999523162842,
38
- 1.0499999523162842,
39
- 1.059999942779541,
40
- 1.059999942779541,
41
- 1.059999942779541,
42
- 1.059999942779541,
43
- 1.059999942779541,
44
- 1.059999942779541,
45
- 1.0999999046325684,
46
- 1.1799999475479126,
47
- 1.1799999475479126,
48
- 1.3700000047683716,
49
- 1.4899998903274536,
50
- 2.109999895095825,
51
- 2.8899998664855957,
52
- 3.9499998092651367,
53
- 4.299999713897705,
54
- 6.429999828338623,
55
- 8.09000015258789,
56
- 10.690000534057617,
57
- 12.050000190734863,
58
- 18.229999542236328,
59
- 18.84000015258789,
60
- 19.899999618530273,
61
- 21.420000076293945,
62
- 26.200000762939453,
63
- 34.28000259399414,
64
- 34.590003967285156,
65
- 38.730003356933594,
66
- 40.22000503540039,
67
- 42.54000473022461,
68
- 44.000003814697266,
69
- 47.590003967285156,
70
- 54.750003814697266,
71
- 56.19000244140625,
72
- 57.44000244140625,
73
- 57.4900016784668,
74
- 61.20000076293945,
75
- 61.540000915527344,
76
- 61.75,
77
- 61.779998779296875,
78
- 62.06999969482422,
79
- 63.11000061035156,
80
- 63.43000030517578,
81
- 63.560001373291016,
82
- 63.71000289916992,
83
- 63.92000198364258,
84
- 63.94000244140625,
85
- 63.94000244140625,
86
- 63.96000289916992,
87
- 63.980003356933594,
88
- 64.0300064086914,
89
- 64.0300064086914,
90
- 64.0300064086914,
91
- 64.04000854492188,
92
- 64.10000610351562,
93
- 64.19000244140625,
94
- 64.20999908447266,
95
- 64.75,
96
- 64.95999908447266
97
- ],
98
- "long_mscale": 1.243163121016122,
99
- "original_max_position_embeddings": 4096,
100
- "short_factor": [
101
- 1.0,
102
- 1.0399999618530273,
103
- 1.0399999618530273,
104
- 1.0399999618530273,
105
- 1.0499999523162842,
106
- 1.0499999523162842,
107
- 1.0499999523162842,
108
- 1.0499999523162842,
109
- 1.0499999523162842,
110
- 1.0499999523162842,
111
- 1.0499999523162842,
112
- 1.0499999523162842,
113
- 1.0499999523162842,
114
- 1.0499999523162842,
115
- 1.059999942779541,
116
- 1.059999942779541,
117
- 1.0699999332427979,
118
- 1.0699999332427979,
119
- 1.0699999332427979,
120
- 1.0699999332427979,
121
- 1.1399999856948853,
122
- 1.159999966621399,
123
- 1.159999966621399,
124
- 1.159999966621399,
125
- 1.159999966621399,
126
- 1.1799999475479126,
127
- 1.1999999284744263,
128
- 1.3199999332427979,
129
- 1.3399999141693115,
130
- 1.3499999046325684,
131
- 1.3999998569488525,
132
- 1.4799998998641968,
133
- 1.4999998807907104,
134
- 1.589999794960022,
135
- 1.6499998569488525,
136
- 1.71999990940094,
137
- 1.8999998569488525,
138
- 1.9099998474121094,
139
- 1.9099998474121094,
140
- 1.9899998903274536,
141
- 1.9999998807907104,
142
- 1.9999998807907104,
143
- 2.009999990463257,
144
- 2.009999990463257,
145
- 2.009999990463257,
146
- 2.009999990463257,
147
- 2.009999990463257,
148
- 2.009999990463257,
149
- 2.009999990463257,
150
- 2.009999990463257,
151
- 2.009999990463257,
152
- 2.009999990463257,
153
- 2.009999990463257,
154
- 2.009999990463257,
155
- 2.009999990463257,
156
- 2.009999990463257,
157
- 2.009999990463257,
158
- 2.009999990463257,
159
- 2.009999990463257,
160
- 2.0999999046325684,
161
- 2.319999933242798,
162
- 2.419999837875366,
163
- 2.5899999141693115,
164
- 2.7899999618530273
165
- ],
166
- "short_mscale": 1.243163121016122,
167
- "type": "longrope"
168
- },
169
  "rope_theta": 10000.0,
170
  "router_aux_loss_coef": 0.0,
171
  "router_jitter_noise": 0.01,
172
- "sliding_window": 131072,
173
  "tie_word_embeddings": false,
174
  "torch_dtype": "bfloat16",
175
  "transformers_version": "4.41.2",
 
18
  "input_jitter_noise": 0.01,
19
  "intermediate_size": 2240,
20
  "lm_head_bias": true,
21
+ "max_position_embeddings": 4096,
22
  "model_type": "phimoe",
23
  "num_attention_heads": 32,
24
  "num_experts_per_tok": 2,
 
28
  "original_max_position_embeddings": 4096,
29
  "output_router_logits": false,
30
  "rms_norm_eps": 1e-05,
31
+ "rope_scaling": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  "rope_theta": 10000.0,
33
  "router_aux_loss_coef": 0.0,
34
  "router_jitter_noise": 0.01,
35
+ "sliding_window": 4096,
36
  "tie_word_embeddings": false,
37
  "torch_dtype": "bfloat16",
38
  "transformers_version": "4.41.2",
model-00001-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:586ab060777d7bcf7a53abe5b0cbf55b1716a0b14386ef015ae87294e74d8134
3
  size 4995293984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a623fee51e22a18b446a2885badaf62a4dbdf49325a61ff643be853c1c3c02a1
3
  size 4995293984
model-00002-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5e79658051a9b544795dab8a2c627c1400e4899a28852e8c75e9ef697120f8e
3
  size 4966538712
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83c51b75c485019b43c73e3fea48fd558f8b2f4af3349170f158c3c0853052e3
3
  size 4966538712
model-00003-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:484ccc0dbc07cf3a7b70a8a4c48518cfd1f2bc88860b845e024ff488ebbb7de2
3
  size 4986555656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48e1b8ceede8d80f965076e366c421e07f48fa58ba7177ca963b26c4c4d430e8
3
  size 4986555656
model-00004-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5ffd5ce49775737d361803d3ea853d8dfe60f3546299e81d5d743c4d794143c7
3
  size 4989562032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d5197aadddd4601801ae608b8faa458ce811740b70f4a9519e82b6faaad24e9
3
  size 4989562032
model-00005-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca7c3d7023cee93f1da3947fdbf40ee5472b780be7b8867988fb8fc01f13dd36
3
  size 4989562032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a6ac6179b129441fcf91f3766a96b06756aa0caf9bc1ded2849a39578fc3f6e
3
  size 4989562032
model-00006-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:575b96d6d469ea226ff25bef40082dcb48ca526c26670eefaa14df9e76ce7785
3
  size 4989562040
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:821c6a3dec0728283a595299988ff6910d364ea9aa5fbb951d47f99690620143
3
  size 4989562040
model-00007-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f148de3a6c7f8c8bf7d453a7d18a039a740a1ce57da188226c3ccc3acfc4219b
3
  size 1484567336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfdd16e74d2b4fc03ed66ada75a375cba0c3d5d975c2eaf79d1abf3116a0a791
3
  size 1484567336