mingdali commited on
Commit
4f8bdcf
·
verified ·
1 Parent(s): 08c9928

Upload 3 files

Browse files
Files changed (3) hide show
  1. pytorch_model.bin.index.json +871 -0
  2. tokenization_qwen.py +593 -0
  3. visual.py +482 -0
pytorch_model.bin.index.json ADDED
@@ -0,0 +1,871 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 19465979392
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "pytorch_model-00002-of-00002.bin",
7
+ "transformer.h.0.attn.c_attn.bias": "pytorch_model-00001-of-00002.bin",
8
+ "transformer.h.0.attn.c_attn.weight": "pytorch_model-00001-of-00002.bin",
9
+ "transformer.h.0.attn.c_proj.weight": "pytorch_model-00001-of-00002.bin",
10
+ "transformer.h.0.ln_1.weight": "pytorch_model-00001-of-00002.bin",
11
+ "transformer.h.0.ln_2.weight": "pytorch_model-00001-of-00002.bin",
12
+ "transformer.h.0.mlp.c_proj.weight": "pytorch_model-00001-of-00002.bin",
13
+ "transformer.h.0.mlp.w1.weight": "pytorch_model-00001-of-00002.bin",
14
+ "transformer.h.0.mlp.w2.weight": "pytorch_model-00001-of-00002.bin",
15
+ "transformer.h.1.attn.c_attn.bias": "pytorch_model-00001-of-00002.bin",
16
+ "transformer.h.1.attn.c_attn.weight": "pytorch_model-00001-of-00002.bin",
17
+ "transformer.h.1.attn.c_proj.weight": "pytorch_model-00001-of-00002.bin",
18
+ "transformer.h.1.ln_1.weight": "pytorch_model-00001-of-00002.bin",
19
+ "transformer.h.1.ln_2.weight": "pytorch_model-00001-of-00002.bin",
20
+ "transformer.h.1.mlp.c_proj.weight": "pytorch_model-00001-of-00002.bin",
21
+ "transformer.h.1.mlp.w1.weight": "pytorch_model-00001-of-00002.bin",
22
+ "transformer.h.1.mlp.w2.weight": "pytorch_model-00001-of-00002.bin",
23
+ "transformer.h.10.attn.c_attn.bias": "pytorch_model-00001-of-00002.bin",
24
+ "transformer.h.10.attn.c_attn.weight": "pytorch_model-00001-of-00002.bin",
25
+ "transformer.h.10.attn.c_proj.weight": "pytorch_model-00001-of-00002.bin",
26
+ "transformer.h.10.ln_1.weight": "pytorch_model-00001-of-00002.bin",
27
+ "transformer.h.10.ln_2.weight": "pytorch_model-00001-of-00002.bin",
28
+ "transformer.h.10.mlp.c_proj.weight": "pytorch_model-00001-of-00002.bin",
29
+ "transformer.h.10.mlp.w1.weight": "pytorch_model-00001-of-00002.bin",
30
+ "transformer.h.10.mlp.w2.weight": "pytorch_model-00001-of-00002.bin",
31
+ "transformer.h.11.attn.c_attn.bias": "pytorch_model-00001-of-00002.bin",
32
+ "transformer.h.11.attn.c_attn.weight": "pytorch_model-00001-of-00002.bin",
33
+ "transformer.h.11.attn.c_proj.weight": "pytorch_model-00001-of-00002.bin",
34
+ "transformer.h.11.ln_1.weight": "pytorch_model-00001-of-00002.bin",
35
+ "transformer.h.11.ln_2.weight": "pytorch_model-00001-of-00002.bin",
36
+ "transformer.h.11.mlp.c_proj.weight": "pytorch_model-00001-of-00002.bin",
37
+ "transformer.h.11.mlp.w1.weight": "pytorch_model-00001-of-00002.bin",
38
+ "transformer.h.11.mlp.w2.weight": "pytorch_model-00001-of-00002.bin",
39
+ "transformer.h.12.attn.c_attn.bias": "pytorch_model-00001-of-00002.bin",
40
+ "transformer.h.12.attn.c_attn.weight": "pytorch_model-00001-of-00002.bin",
41
+ "transformer.h.12.attn.c_proj.weight": "pytorch_model-00001-of-00002.bin",
42
+ "transformer.h.12.ln_1.weight": "pytorch_model-00001-of-00002.bin",
43
+ "transformer.h.12.ln_2.weight": "pytorch_model-00001-of-00002.bin",
44
+ "transformer.h.12.mlp.c_proj.weight": "pytorch_model-00001-of-00002.bin",
45
+ "transformer.h.12.mlp.w1.weight": "pytorch_model-00001-of-00002.bin",
46
+ "transformer.h.12.mlp.w2.weight": "pytorch_model-00001-of-00002.bin",
47
+ "transformer.h.13.attn.c_attn.bias": "pytorch_model-00001-of-00002.bin",
48
+ "transformer.h.13.attn.c_attn.weight": "pytorch_model-00001-of-00002.bin",
49
+ "transformer.h.13.attn.c_proj.weight": "pytorch_model-00001-of-00002.bin",
50
+ "transformer.h.13.ln_1.weight": "pytorch_model-00001-of-00002.bin",
51
+ "transformer.h.13.ln_2.weight": "pytorch_model-00001-of-00002.bin",
52
+ "transformer.h.13.mlp.c_proj.weight": "pytorch_model-00001-of-00002.bin",
53
+ "transformer.h.13.mlp.w1.weight": "pytorch_model-00001-of-00002.bin",
54
+ "transformer.h.13.mlp.w2.weight": "pytorch_model-00001-of-00002.bin",
55
+ "transformer.h.14.attn.c_attn.bias": "pytorch_model-00001-of-00002.bin",
56
+ "transformer.h.14.attn.c_attn.weight": "pytorch_model-00001-of-00002.bin",
57
+ "transformer.h.14.attn.c_proj.weight": "pytorch_model-00001-of-00002.bin",
58
+ "transformer.h.14.ln_1.weight": "pytorch_model-00001-of-00002.bin",
59
+ "transformer.h.14.ln_2.weight": "pytorch_model-00001-of-00002.bin",
60
+ "transformer.h.14.mlp.c_proj.weight": "pytorch_model-00001-of-00002.bin",
61
+ "transformer.h.14.mlp.w1.weight": "pytorch_model-00001-of-00002.bin",
62
+ "transformer.h.14.mlp.w2.weight": "pytorch_model-00001-of-00002.bin",
63
+ "transformer.h.15.attn.c_attn.bias": "pytorch_model-00001-of-00002.bin",
64
+ "transformer.h.15.attn.c_attn.weight": "pytorch_model-00001-of-00002.bin",
65
+ "transformer.h.15.attn.c_proj.weight": "pytorch_model-00001-of-00002.bin",
66
+ "transformer.h.15.ln_1.weight": "pytorch_model-00001-of-00002.bin",
67
+ "transformer.h.15.ln_2.weight": "pytorch_model-00001-of-00002.bin",
68
+ "transformer.h.15.mlp.c_proj.weight": "pytorch_model-00001-of-00002.bin",
69
+ "transformer.h.15.mlp.w1.weight": "pytorch_model-00001-of-00002.bin",
70
+ "transformer.h.15.mlp.w2.weight": "pytorch_model-00001-of-00002.bin",
71
+ "transformer.h.16.attn.c_attn.bias": "pytorch_model-00001-of-00002.bin",
72
+ "transformer.h.16.attn.c_attn.weight": "pytorch_model-00001-of-00002.bin",
73
+ "transformer.h.16.attn.c_proj.weight": "pytorch_model-00001-of-00002.bin",
74
+ "transformer.h.16.ln_1.weight": "pytorch_model-00001-of-00002.bin",
75
+ "transformer.h.16.ln_2.weight": "pytorch_model-00001-of-00002.bin",
76
+ "transformer.h.16.mlp.c_proj.weight": "pytorch_model-00001-of-00002.bin",
77
+ "transformer.h.16.mlp.w1.weight": "pytorch_model-00001-of-00002.bin",
78
+ "transformer.h.16.mlp.w2.weight": "pytorch_model-00001-of-00002.bin",
79
+ "transformer.h.17.attn.c_attn.bias": "pytorch_model-00001-of-00002.bin",
80
+ "transformer.h.17.attn.c_attn.weight": "pytorch_model-00001-of-00002.bin",
81
+ "transformer.h.17.attn.c_proj.weight": "pytorch_model-00001-of-00002.bin",
82
+ "transformer.h.17.ln_1.weight": "pytorch_model-00001-of-00002.bin",
83
+ "transformer.h.17.ln_2.weight": "pytorch_model-00001-of-00002.bin",
84
+ "transformer.h.17.mlp.c_proj.weight": "pytorch_model-00001-of-00002.bin",
85
+ "transformer.h.17.mlp.w1.weight": "pytorch_model-00001-of-00002.bin",
86
+ "transformer.h.17.mlp.w2.weight": "pytorch_model-00001-of-00002.bin",
87
+ "transformer.h.18.attn.c_attn.bias": "pytorch_model-00001-of-00002.bin",
88
+ "transformer.h.18.attn.c_attn.weight": "pytorch_model-00001-of-00002.bin",
89
+ "transformer.h.18.attn.c_proj.weight": "pytorch_model-00001-of-00002.bin",
90
+ "transformer.h.18.ln_1.weight": "pytorch_model-00001-of-00002.bin",
91
+ "transformer.h.18.ln_2.weight": "pytorch_model-00001-of-00002.bin",
92
+ "transformer.h.18.mlp.c_proj.weight": "pytorch_model-00001-of-00002.bin",
93
+ "transformer.h.18.mlp.w1.weight": "pytorch_model-00001-of-00002.bin",
94
+ "transformer.h.18.mlp.w2.weight": "pytorch_model-00001-of-00002.bin",
95
+ "transformer.h.19.attn.c_attn.bias": "pytorch_model-00001-of-00002.bin",
96
+ "transformer.h.19.attn.c_attn.weight": "pytorch_model-00001-of-00002.bin",
97
+ "transformer.h.19.attn.c_proj.weight": "pytorch_model-00001-of-00002.bin",
98
+ "transformer.h.19.ln_1.weight": "pytorch_model-00001-of-00002.bin",
99
+ "transformer.h.19.ln_2.weight": "pytorch_model-00001-of-00002.bin",
100
+ "transformer.h.19.mlp.c_proj.weight": "pytorch_model-00001-of-00002.bin",
101
+ "transformer.h.19.mlp.w1.weight": "pytorch_model-00001-of-00002.bin",
102
+ "transformer.h.19.mlp.w2.weight": "pytorch_model-00001-of-00002.bin",
103
+ "transformer.h.2.attn.c_attn.bias": "pytorch_model-00001-of-00002.bin",
104
+ "transformer.h.2.attn.c_attn.weight": "pytorch_model-00001-of-00002.bin",
105
+ "transformer.h.2.attn.c_proj.weight": "pytorch_model-00001-of-00002.bin",
106
+ "transformer.h.2.ln_1.weight": "pytorch_model-00001-of-00002.bin",
107
+ "transformer.h.2.ln_2.weight": "pytorch_model-00001-of-00002.bin",
108
+ "transformer.h.2.mlp.c_proj.weight": "pytorch_model-00001-of-00002.bin",
109
+ "transformer.h.2.mlp.w1.weight": "pytorch_model-00001-of-00002.bin",
110
+ "transformer.h.2.mlp.w2.weight": "pytorch_model-00001-of-00002.bin",
111
+ "transformer.h.20.attn.c_attn.bias": "pytorch_model-00001-of-00002.bin",
112
+ "transformer.h.20.attn.c_attn.weight": "pytorch_model-00001-of-00002.bin",
113
+ "transformer.h.20.attn.c_proj.weight": "pytorch_model-00001-of-00002.bin",
114
+ "transformer.h.20.ln_1.weight": "pytorch_model-00001-of-00002.bin",
115
+ "transformer.h.20.ln_2.weight": "pytorch_model-00001-of-00002.bin",
116
+ "transformer.h.20.mlp.c_proj.weight": "pytorch_model-00001-of-00002.bin",
117
+ "transformer.h.20.mlp.w1.weight": "pytorch_model-00001-of-00002.bin",
118
+ "transformer.h.20.mlp.w2.weight": "pytorch_model-00001-of-00002.bin",
119
+ "transformer.h.21.attn.c_attn.bias": "pytorch_model-00001-of-00002.bin",
120
+ "transformer.h.21.attn.c_attn.weight": "pytorch_model-00001-of-00002.bin",
121
+ "transformer.h.21.attn.c_proj.weight": "pytorch_model-00001-of-00002.bin",
122
+ "transformer.h.21.ln_1.weight": "pytorch_model-00001-of-00002.bin",
123
+ "transformer.h.21.ln_2.weight": "pytorch_model-00001-of-00002.bin",
124
+ "transformer.h.21.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
125
+ "transformer.h.21.mlp.w1.weight": "pytorch_model-00001-of-00002.bin",
126
+ "transformer.h.21.mlp.w2.weight": "pytorch_model-00002-of-00002.bin",
127
+ "transformer.h.22.attn.c_attn.bias": "pytorch_model-00002-of-00002.bin",
128
+ "transformer.h.22.attn.c_attn.weight": "pytorch_model-00002-of-00002.bin",
129
+ "transformer.h.22.attn.c_proj.weight": "pytorch_model-00002-of-00002.bin",
130
+ "transformer.h.22.ln_1.weight": "pytorch_model-00002-of-00002.bin",
131
+ "transformer.h.22.ln_2.weight": "pytorch_model-00002-of-00002.bin",
132
+ "transformer.h.22.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
133
+ "transformer.h.22.mlp.w1.weight": "pytorch_model-00002-of-00002.bin",
134
+ "transformer.h.22.mlp.w2.weight": "pytorch_model-00002-of-00002.bin",
135
+ "transformer.h.23.attn.c_attn.bias": "pytorch_model-00002-of-00002.bin",
136
+ "transformer.h.23.attn.c_attn.weight": "pytorch_model-00002-of-00002.bin",
137
+ "transformer.h.23.attn.c_proj.weight": "pytorch_model-00002-of-00002.bin",
138
+ "transformer.h.23.ln_1.weight": "pytorch_model-00002-of-00002.bin",
139
+ "transformer.h.23.ln_2.weight": "pytorch_model-00002-of-00002.bin",
140
+ "transformer.h.23.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
141
+ "transformer.h.23.mlp.w1.weight": "pytorch_model-00002-of-00002.bin",
142
+ "transformer.h.23.mlp.w2.weight": "pytorch_model-00002-of-00002.bin",
143
+ "transformer.h.24.attn.c_attn.bias": "pytorch_model-00002-of-00002.bin",
144
+ "transformer.h.24.attn.c_attn.weight": "pytorch_model-00002-of-00002.bin",
145
+ "transformer.h.24.attn.c_proj.weight": "pytorch_model-00002-of-00002.bin",
146
+ "transformer.h.24.ln_1.weight": "pytorch_model-00002-of-00002.bin",
147
+ "transformer.h.24.ln_2.weight": "pytorch_model-00002-of-00002.bin",
148
+ "transformer.h.24.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
149
+ "transformer.h.24.mlp.w1.weight": "pytorch_model-00002-of-00002.bin",
150
+ "transformer.h.24.mlp.w2.weight": "pytorch_model-00002-of-00002.bin",
151
+ "transformer.h.25.attn.c_attn.bias": "pytorch_model-00002-of-00002.bin",
152
+ "transformer.h.25.attn.c_attn.weight": "pytorch_model-00002-of-00002.bin",
153
+ "transformer.h.25.attn.c_proj.weight": "pytorch_model-00002-of-00002.bin",
154
+ "transformer.h.25.ln_1.weight": "pytorch_model-00002-of-00002.bin",
155
+ "transformer.h.25.ln_2.weight": "pytorch_model-00002-of-00002.bin",
156
+ "transformer.h.25.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
157
+ "transformer.h.25.mlp.w1.weight": "pytorch_model-00002-of-00002.bin",
158
+ "transformer.h.25.mlp.w2.weight": "pytorch_model-00002-of-00002.bin",
159
+ "transformer.h.26.attn.c_attn.bias": "pytorch_model-00002-of-00002.bin",
160
+ "transformer.h.26.attn.c_attn.weight": "pytorch_model-00002-of-00002.bin",
161
+ "transformer.h.26.attn.c_proj.weight": "pytorch_model-00002-of-00002.bin",
162
+ "transformer.h.26.ln_1.weight": "pytorch_model-00002-of-00002.bin",
163
+ "transformer.h.26.ln_2.weight": "pytorch_model-00002-of-00002.bin",
164
+ "transformer.h.26.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
165
+ "transformer.h.26.mlp.w1.weight": "pytorch_model-00002-of-00002.bin",
166
+ "transformer.h.26.mlp.w2.weight": "pytorch_model-00002-of-00002.bin",
167
+ "transformer.h.27.attn.c_attn.bias": "pytorch_model-00002-of-00002.bin",
168
+ "transformer.h.27.attn.c_attn.weight": "pytorch_model-00002-of-00002.bin",
169
+ "transformer.h.27.attn.c_proj.weight": "pytorch_model-00002-of-00002.bin",
170
+ "transformer.h.27.ln_1.weight": "pytorch_model-00002-of-00002.bin",
171
+ "transformer.h.27.ln_2.weight": "pytorch_model-00002-of-00002.bin",
172
+ "transformer.h.27.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
173
+ "transformer.h.27.mlp.w1.weight": "pytorch_model-00002-of-00002.bin",
174
+ "transformer.h.27.mlp.w2.weight": "pytorch_model-00002-of-00002.bin",
175
+ "transformer.h.28.attn.c_attn.bias": "pytorch_model-00002-of-00002.bin",
176
+ "transformer.h.28.attn.c_attn.weight": "pytorch_model-00002-of-00002.bin",
177
+ "transformer.h.28.attn.c_proj.weight": "pytorch_model-00002-of-00002.bin",
178
+ "transformer.h.28.ln_1.weight": "pytorch_model-00002-of-00002.bin",
179
+ "transformer.h.28.ln_2.weight": "pytorch_model-00002-of-00002.bin",
180
+ "transformer.h.28.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
181
+ "transformer.h.28.mlp.w1.weight": "pytorch_model-00002-of-00002.bin",
182
+ "transformer.h.28.mlp.w2.weight": "pytorch_model-00002-of-00002.bin",
183
+ "transformer.h.29.attn.c_attn.bias": "pytorch_model-00002-of-00002.bin",
184
+ "transformer.h.29.attn.c_attn.weight": "pytorch_model-00002-of-00002.bin",
185
+ "transformer.h.29.attn.c_proj.weight": "pytorch_model-00002-of-00002.bin",
186
+ "transformer.h.29.ln_1.weight": "pytorch_model-00002-of-00002.bin",
187
+ "transformer.h.29.ln_2.weight": "pytorch_model-00002-of-00002.bin",
188
+ "transformer.h.29.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
189
+ "transformer.h.29.mlp.w1.weight": "pytorch_model-00002-of-00002.bin",
190
+ "transformer.h.29.mlp.w2.weight": "pytorch_model-00002-of-00002.bin",
191
+ "transformer.h.3.attn.c_attn.bias": "pytorch_model-00001-of-00002.bin",
192
+ "transformer.h.3.attn.c_attn.weight": "pytorch_model-00001-of-00002.bin",
193
+ "transformer.h.3.attn.c_proj.weight": "pytorch_model-00001-of-00002.bin",
194
+ "transformer.h.3.ln_1.weight": "pytorch_model-00001-of-00002.bin",
195
+ "transformer.h.3.ln_2.weight": "pytorch_model-00001-of-00002.bin",
196
+ "transformer.h.3.mlp.c_proj.weight": "pytorch_model-00001-of-00002.bin",
197
+ "transformer.h.3.mlp.w1.weight": "pytorch_model-00001-of-00002.bin",
198
+ "transformer.h.3.mlp.w2.weight": "pytorch_model-00001-of-00002.bin",
199
+ "transformer.h.30.attn.c_attn.bias": "pytorch_model-00002-of-00002.bin",
200
+ "transformer.h.30.attn.c_attn.weight": "pytorch_model-00002-of-00002.bin",
201
+ "transformer.h.30.attn.c_proj.weight": "pytorch_model-00002-of-00002.bin",
202
+ "transformer.h.30.ln_1.weight": "pytorch_model-00002-of-00002.bin",
203
+ "transformer.h.30.ln_2.weight": "pytorch_model-00002-of-00002.bin",
204
+ "transformer.h.30.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
205
+ "transformer.h.30.mlp.w1.weight": "pytorch_model-00002-of-00002.bin",
206
+ "transformer.h.30.mlp.w2.weight": "pytorch_model-00002-of-00002.bin",
207
+ "transformer.h.31.attn.c_attn.bias": "pytorch_model-00002-of-00002.bin",
208
+ "transformer.h.31.attn.c_attn.weight": "pytorch_model-00002-of-00002.bin",
209
+ "transformer.h.31.attn.c_proj.weight": "pytorch_model-00002-of-00002.bin",
210
+ "transformer.h.31.ln_1.weight": "pytorch_model-00002-of-00002.bin",
211
+ "transformer.h.31.ln_2.weight": "pytorch_model-00002-of-00002.bin",
212
+ "transformer.h.31.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
213
+ "transformer.h.31.mlp.w1.weight": "pytorch_model-00002-of-00002.bin",
214
+ "transformer.h.31.mlp.w2.weight": "pytorch_model-00002-of-00002.bin",
215
+ "transformer.h.4.attn.c_attn.bias": "pytorch_model-00001-of-00002.bin",
216
+ "transformer.h.4.attn.c_attn.weight": "pytorch_model-00001-of-00002.bin",
217
+ "transformer.h.4.attn.c_proj.weight": "pytorch_model-00001-of-00002.bin",
218
+ "transformer.h.4.ln_1.weight": "pytorch_model-00001-of-00002.bin",
219
+ "transformer.h.4.ln_2.weight": "pytorch_model-00001-of-00002.bin",
220
+ "transformer.h.4.mlp.c_proj.weight": "pytorch_model-00001-of-00002.bin",
221
+ "transformer.h.4.mlp.w1.weight": "pytorch_model-00001-of-00002.bin",
222
+ "transformer.h.4.mlp.w2.weight": "pytorch_model-00001-of-00002.bin",
223
+ "transformer.h.5.attn.c_attn.bias": "pytorch_model-00001-of-00002.bin",
224
+ "transformer.h.5.attn.c_attn.weight": "pytorch_model-00001-of-00002.bin",
225
+ "transformer.h.5.attn.c_proj.weight": "pytorch_model-00001-of-00002.bin",
226
+ "transformer.h.5.ln_1.weight": "pytorch_model-00001-of-00002.bin",
227
+ "transformer.h.5.ln_2.weight": "pytorch_model-00001-of-00002.bin",
228
+ "transformer.h.5.mlp.c_proj.weight": "pytorch_model-00001-of-00002.bin",
229
+ "transformer.h.5.mlp.w1.weight": "pytorch_model-00001-of-00002.bin",
230
+ "transformer.h.5.mlp.w2.weight": "pytorch_model-00001-of-00002.bin",
231
+ "transformer.h.6.attn.c_attn.bias": "pytorch_model-00001-of-00002.bin",
232
+ "transformer.h.6.attn.c_attn.weight": "pytorch_model-00001-of-00002.bin",
233
+ "transformer.h.6.attn.c_proj.weight": "pytorch_model-00001-of-00002.bin",
234
+ "transformer.h.6.ln_1.weight": "pytorch_model-00001-of-00002.bin",
235
+ "transformer.h.6.ln_2.weight": "pytorch_model-00001-of-00002.bin",
236
+ "transformer.h.6.mlp.c_proj.weight": "pytorch_model-00001-of-00002.bin",
237
+ "transformer.h.6.mlp.w1.weight": "pytorch_model-00001-of-00002.bin",
238
+ "transformer.h.6.mlp.w2.weight": "pytorch_model-00001-of-00002.bin",
239
+ "transformer.h.7.attn.c_attn.bias": "pytorch_model-00001-of-00002.bin",
240
+ "transformer.h.7.attn.c_attn.weight": "pytorch_model-00001-of-00002.bin",
241
+ "transformer.h.7.attn.c_proj.weight": "pytorch_model-00001-of-00002.bin",
242
+ "transformer.h.7.ln_1.weight": "pytorch_model-00001-of-00002.bin",
243
+ "transformer.h.7.ln_2.weight": "pytorch_model-00001-of-00002.bin",
244
+ "transformer.h.7.mlp.c_proj.weight": "pytorch_model-00001-of-00002.bin",
245
+ "transformer.h.7.mlp.w1.weight": "pytorch_model-00001-of-00002.bin",
246
+ "transformer.h.7.mlp.w2.weight": "pytorch_model-00001-of-00002.bin",
247
+ "transformer.h.8.attn.c_attn.bias": "pytorch_model-00001-of-00002.bin",
248
+ "transformer.h.8.attn.c_attn.weight": "pytorch_model-00001-of-00002.bin",
249
+ "transformer.h.8.attn.c_proj.weight": "pytorch_model-00001-of-00002.bin",
250
+ "transformer.h.8.ln_1.weight": "pytorch_model-00001-of-00002.bin",
251
+ "transformer.h.8.ln_2.weight": "pytorch_model-00001-of-00002.bin",
252
+ "transformer.h.8.mlp.c_proj.weight": "pytorch_model-00001-of-00002.bin",
253
+ "transformer.h.8.mlp.w1.weight": "pytorch_model-00001-of-00002.bin",
254
+ "transformer.h.8.mlp.w2.weight": "pytorch_model-00001-of-00002.bin",
255
+ "transformer.h.9.attn.c_attn.bias": "pytorch_model-00001-of-00002.bin",
256
+ "transformer.h.9.attn.c_attn.weight": "pytorch_model-00001-of-00002.bin",
257
+ "transformer.h.9.attn.c_proj.weight": "pytorch_model-00001-of-00002.bin",
258
+ "transformer.h.9.ln_1.weight": "pytorch_model-00001-of-00002.bin",
259
+ "transformer.h.9.ln_2.weight": "pytorch_model-00001-of-00002.bin",
260
+ "transformer.h.9.mlp.c_proj.weight": "pytorch_model-00001-of-00002.bin",
261
+ "transformer.h.9.mlp.w1.weight": "pytorch_model-00001-of-00002.bin",
262
+ "transformer.h.9.mlp.w2.weight": "pytorch_model-00001-of-00002.bin",
263
+ "transformer.ln_f.weight": "pytorch_model-00002-of-00002.bin",
264
+ "transformer.visual.attn_pool.attn.in_proj_bias": "pytorch_model-00002-of-00002.bin",
265
+ "transformer.visual.attn_pool.attn.in_proj_weight": "pytorch_model-00002-of-00002.bin",
266
+ "transformer.visual.attn_pool.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
267
+ "transformer.visual.attn_pool.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
268
+ "transformer.visual.attn_pool.kv_proj.weight": "pytorch_model-00002-of-00002.bin",
269
+ "transformer.visual.attn_pool.ln_kv.bias": "pytorch_model-00002-of-00002.bin",
270
+ "transformer.visual.attn_pool.ln_kv.weight": "pytorch_model-00002-of-00002.bin",
271
+ "transformer.visual.attn_pool.ln_q.bias": "pytorch_model-00002-of-00002.bin",
272
+ "transformer.visual.attn_pool.ln_q.weight": "pytorch_model-00002-of-00002.bin",
273
+ "transformer.visual.attn_pool.pos_embed": "pytorch_model-00002-of-00002.bin",
274
+ "transformer.visual.attn_pool.query": "pytorch_model-00002-of-00002.bin",
275
+ "transformer.visual.attn_pool2.attn.in_proj_bias": "pytorch_model-00002-of-00002.bin",
276
+ "transformer.visual.attn_pool2.attn.in_proj_weight": "pytorch_model-00002-of-00002.bin",
277
+ "transformer.visual.attn_pool2.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
278
+ "transformer.visual.attn_pool2.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
279
+ "transformer.visual.attn_pool2.kv_proj.weight": "pytorch_model-00002-of-00002.bin",
280
+ "transformer.visual.attn_pool2.ln_kv.bias": "pytorch_model-00002-of-00002.bin",
281
+ "transformer.visual.attn_pool2.ln_kv.weight": "pytorch_model-00002-of-00002.bin",
282
+ "transformer.visual.attn_pool2.ln_q.bias": "pytorch_model-00002-of-00002.bin",
283
+ "transformer.visual.attn_pool2.ln_q.weight": "pytorch_model-00002-of-00002.bin",
284
+ "transformer.visual.attn_pool2.pos_embed": "pytorch_model-00002-of-00002.bin",
285
+ "transformer.visual.attn_pool2.query": "pytorch_model-00002-of-00002.bin",
286
+ "transformer.visual.conv1.weight": "pytorch_model-00002-of-00002.bin",
287
+ "transformer.visual.ln_post.bias": "pytorch_model-00002-of-00002.bin",
288
+ "transformer.visual.ln_post.weight": "pytorch_model-00002-of-00002.bin",
289
+ "transformer.visual.ln_pre.bias": "pytorch_model-00002-of-00002.bin",
290
+ "transformer.visual.ln_pre.weight": "pytorch_model-00002-of-00002.bin",
291
+ "transformer.visual.positional_embedding": "pytorch_model-00002-of-00002.bin",
292
+ "transformer.visual.proj": "pytorch_model-00002-of-00002.bin",
293
+ "transformer.visual.transformer.resblocks.0.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
294
+ "transformer.visual.transformer.resblocks.0.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
295
+ "transformer.visual.transformer.resblocks.0.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
296
+ "transformer.visual.transformer.resblocks.0.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
297
+ "transformer.visual.transformer.resblocks.0.ln_1.bias": "pytorch_model-00002-of-00002.bin",
298
+ "transformer.visual.transformer.resblocks.0.ln_1.weight": "pytorch_model-00002-of-00002.bin",
299
+ "transformer.visual.transformer.resblocks.0.ln_2.bias": "pytorch_model-00002-of-00002.bin",
300
+ "transformer.visual.transformer.resblocks.0.ln_2.weight": "pytorch_model-00002-of-00002.bin",
301
+ "transformer.visual.transformer.resblocks.0.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
302
+ "transformer.visual.transformer.resblocks.0.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
303
+ "transformer.visual.transformer.resblocks.0.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
304
+ "transformer.visual.transformer.resblocks.0.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
305
+ "transformer.visual.transformer.resblocks.1.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
306
+ "transformer.visual.transformer.resblocks.1.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
307
+ "transformer.visual.transformer.resblocks.1.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
308
+ "transformer.visual.transformer.resblocks.1.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
309
+ "transformer.visual.transformer.resblocks.1.ln_1.bias": "pytorch_model-00002-of-00002.bin",
310
+ "transformer.visual.transformer.resblocks.1.ln_1.weight": "pytorch_model-00002-of-00002.bin",
311
+ "transformer.visual.transformer.resblocks.1.ln_2.bias": "pytorch_model-00002-of-00002.bin",
312
+ "transformer.visual.transformer.resblocks.1.ln_2.weight": "pytorch_model-00002-of-00002.bin",
313
+ "transformer.visual.transformer.resblocks.1.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
314
+ "transformer.visual.transformer.resblocks.1.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
315
+ "transformer.visual.transformer.resblocks.1.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
316
+ "transformer.visual.transformer.resblocks.1.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
317
+ "transformer.visual.transformer.resblocks.10.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
318
+ "transformer.visual.transformer.resblocks.10.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
319
+ "transformer.visual.transformer.resblocks.10.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
320
+ "transformer.visual.transformer.resblocks.10.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
321
+ "transformer.visual.transformer.resblocks.10.ln_1.bias": "pytorch_model-00002-of-00002.bin",
322
+ "transformer.visual.transformer.resblocks.10.ln_1.weight": "pytorch_model-00002-of-00002.bin",
323
+ "transformer.visual.transformer.resblocks.10.ln_2.bias": "pytorch_model-00002-of-00002.bin",
324
+ "transformer.visual.transformer.resblocks.10.ln_2.weight": "pytorch_model-00002-of-00002.bin",
325
+ "transformer.visual.transformer.resblocks.10.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
326
+ "transformer.visual.transformer.resblocks.10.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
327
+ "transformer.visual.transformer.resblocks.10.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
328
+ "transformer.visual.transformer.resblocks.10.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
329
+ "transformer.visual.transformer.resblocks.11.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
330
+ "transformer.visual.transformer.resblocks.11.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
331
+ "transformer.visual.transformer.resblocks.11.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
332
+ "transformer.visual.transformer.resblocks.11.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
333
+ "transformer.visual.transformer.resblocks.11.ln_1.bias": "pytorch_model-00002-of-00002.bin",
334
+ "transformer.visual.transformer.resblocks.11.ln_1.weight": "pytorch_model-00002-of-00002.bin",
335
+ "transformer.visual.transformer.resblocks.11.ln_2.bias": "pytorch_model-00002-of-00002.bin",
336
+ "transformer.visual.transformer.resblocks.11.ln_2.weight": "pytorch_model-00002-of-00002.bin",
337
+ "transformer.visual.transformer.resblocks.11.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
338
+ "transformer.visual.transformer.resblocks.11.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
339
+ "transformer.visual.transformer.resblocks.11.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
340
+ "transformer.visual.transformer.resblocks.11.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
341
+ "transformer.visual.transformer.resblocks.12.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
342
+ "transformer.visual.transformer.resblocks.12.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
343
+ "transformer.visual.transformer.resblocks.12.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
344
+ "transformer.visual.transformer.resblocks.12.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
345
+ "transformer.visual.transformer.resblocks.12.ln_1.bias": "pytorch_model-00002-of-00002.bin",
346
+ "transformer.visual.transformer.resblocks.12.ln_1.weight": "pytorch_model-00002-of-00002.bin",
347
+ "transformer.visual.transformer.resblocks.12.ln_2.bias": "pytorch_model-00002-of-00002.bin",
348
+ "transformer.visual.transformer.resblocks.12.ln_2.weight": "pytorch_model-00002-of-00002.bin",
349
+ "transformer.visual.transformer.resblocks.12.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
350
+ "transformer.visual.transformer.resblocks.12.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
351
+ "transformer.visual.transformer.resblocks.12.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
352
+ "transformer.visual.transformer.resblocks.12.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
353
+ "transformer.visual.transformer.resblocks.13.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
354
+ "transformer.visual.transformer.resblocks.13.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
355
+ "transformer.visual.transformer.resblocks.13.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
356
+ "transformer.visual.transformer.resblocks.13.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
357
+ "transformer.visual.transformer.resblocks.13.ln_1.bias": "pytorch_model-00002-of-00002.bin",
358
+ "transformer.visual.transformer.resblocks.13.ln_1.weight": "pytorch_model-00002-of-00002.bin",
359
+ "transformer.visual.transformer.resblocks.13.ln_2.bias": "pytorch_model-00002-of-00002.bin",
360
+ "transformer.visual.transformer.resblocks.13.ln_2.weight": "pytorch_model-00002-of-00002.bin",
361
+ "transformer.visual.transformer.resblocks.13.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
362
+ "transformer.visual.transformer.resblocks.13.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
363
+ "transformer.visual.transformer.resblocks.13.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
364
+ "transformer.visual.transformer.resblocks.13.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
365
+ "transformer.visual.transformer.resblocks.14.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
366
+ "transformer.visual.transformer.resblocks.14.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
367
+ "transformer.visual.transformer.resblocks.14.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
368
+ "transformer.visual.transformer.resblocks.14.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
369
+ "transformer.visual.transformer.resblocks.14.ln_1.bias": "pytorch_model-00002-of-00002.bin",
370
+ "transformer.visual.transformer.resblocks.14.ln_1.weight": "pytorch_model-00002-of-00002.bin",
371
+ "transformer.visual.transformer.resblocks.14.ln_2.bias": "pytorch_model-00002-of-00002.bin",
372
+ "transformer.visual.transformer.resblocks.14.ln_2.weight": "pytorch_model-00002-of-00002.bin",
373
+ "transformer.visual.transformer.resblocks.14.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
374
+ "transformer.visual.transformer.resblocks.14.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
375
+ "transformer.visual.transformer.resblocks.14.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
376
+ "transformer.visual.transformer.resblocks.14.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
377
+ "transformer.visual.transformer.resblocks.15.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
378
+ "transformer.visual.transformer.resblocks.15.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
379
+ "transformer.visual.transformer.resblocks.15.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
380
+ "transformer.visual.transformer.resblocks.15.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
381
+ "transformer.visual.transformer.resblocks.15.ln_1.bias": "pytorch_model-00002-of-00002.bin",
382
+ "transformer.visual.transformer.resblocks.15.ln_1.weight": "pytorch_model-00002-of-00002.bin",
383
+ "transformer.visual.transformer.resblocks.15.ln_2.bias": "pytorch_model-00002-of-00002.bin",
384
+ "transformer.visual.transformer.resblocks.15.ln_2.weight": "pytorch_model-00002-of-00002.bin",
385
+ "transformer.visual.transformer.resblocks.15.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
386
+ "transformer.visual.transformer.resblocks.15.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
387
+ "transformer.visual.transformer.resblocks.15.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
388
+ "transformer.visual.transformer.resblocks.15.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
389
+ "transformer.visual.transformer.resblocks.16.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
390
+ "transformer.visual.transformer.resblocks.16.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
391
+ "transformer.visual.transformer.resblocks.16.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
392
+ "transformer.visual.transformer.resblocks.16.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
393
+ "transformer.visual.transformer.resblocks.16.ln_1.bias": "pytorch_model-00002-of-00002.bin",
394
+ "transformer.visual.transformer.resblocks.16.ln_1.weight": "pytorch_model-00002-of-00002.bin",
395
+ "transformer.visual.transformer.resblocks.16.ln_2.bias": "pytorch_model-00002-of-00002.bin",
396
+ "transformer.visual.transformer.resblocks.16.ln_2.weight": "pytorch_model-00002-of-00002.bin",
397
+ "transformer.visual.transformer.resblocks.16.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
398
+ "transformer.visual.transformer.resblocks.16.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
399
+ "transformer.visual.transformer.resblocks.16.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
400
+ "transformer.visual.transformer.resblocks.16.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
401
+ "transformer.visual.transformer.resblocks.17.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
402
+ "transformer.visual.transformer.resblocks.17.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
403
+ "transformer.visual.transformer.resblocks.17.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
404
+ "transformer.visual.transformer.resblocks.17.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
405
+ "transformer.visual.transformer.resblocks.17.ln_1.bias": "pytorch_model-00002-of-00002.bin",
406
+ "transformer.visual.transformer.resblocks.17.ln_1.weight": "pytorch_model-00002-of-00002.bin",
407
+ "transformer.visual.transformer.resblocks.17.ln_2.bias": "pytorch_model-00002-of-00002.bin",
408
+ "transformer.visual.transformer.resblocks.17.ln_2.weight": "pytorch_model-00002-of-00002.bin",
409
+ "transformer.visual.transformer.resblocks.17.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
410
+ "transformer.visual.transformer.resblocks.17.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
411
+ "transformer.visual.transformer.resblocks.17.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
412
+ "transformer.visual.transformer.resblocks.17.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
413
+ "transformer.visual.transformer.resblocks.18.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
414
+ "transformer.visual.transformer.resblocks.18.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
415
+ "transformer.visual.transformer.resblocks.18.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
416
+ "transformer.visual.transformer.resblocks.18.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
417
+ "transformer.visual.transformer.resblocks.18.ln_1.bias": "pytorch_model-00002-of-00002.bin",
418
+ "transformer.visual.transformer.resblocks.18.ln_1.weight": "pytorch_model-00002-of-00002.bin",
419
+ "transformer.visual.transformer.resblocks.18.ln_2.bias": "pytorch_model-00002-of-00002.bin",
420
+ "transformer.visual.transformer.resblocks.18.ln_2.weight": "pytorch_model-00002-of-00002.bin",
421
+ "transformer.visual.transformer.resblocks.18.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
422
+ "transformer.visual.transformer.resblocks.18.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
423
+ "transformer.visual.transformer.resblocks.18.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
424
+ "transformer.visual.transformer.resblocks.18.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
425
+ "transformer.visual.transformer.resblocks.19.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
426
+ "transformer.visual.transformer.resblocks.19.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
427
+ "transformer.visual.transformer.resblocks.19.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
428
+ "transformer.visual.transformer.resblocks.19.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
429
+ "transformer.visual.transformer.resblocks.19.ln_1.bias": "pytorch_model-00002-of-00002.bin",
430
+ "transformer.visual.transformer.resblocks.19.ln_1.weight": "pytorch_model-00002-of-00002.bin",
431
+ "transformer.visual.transformer.resblocks.19.ln_2.bias": "pytorch_model-00002-of-00002.bin",
432
+ "transformer.visual.transformer.resblocks.19.ln_2.weight": "pytorch_model-00002-of-00002.bin",
433
+ "transformer.visual.transformer.resblocks.19.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
434
+ "transformer.visual.transformer.resblocks.19.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
435
+ "transformer.visual.transformer.resblocks.19.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
436
+ "transformer.visual.transformer.resblocks.19.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
437
+ "transformer.visual.transformer.resblocks.2.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
438
+ "transformer.visual.transformer.resblocks.2.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
439
+ "transformer.visual.transformer.resblocks.2.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
440
+ "transformer.visual.transformer.resblocks.2.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
441
+ "transformer.visual.transformer.resblocks.2.ln_1.bias": "pytorch_model-00002-of-00002.bin",
442
+ "transformer.visual.transformer.resblocks.2.ln_1.weight": "pytorch_model-00002-of-00002.bin",
443
+ "transformer.visual.transformer.resblocks.2.ln_2.bias": "pytorch_model-00002-of-00002.bin",
444
+ "transformer.visual.transformer.resblocks.2.ln_2.weight": "pytorch_model-00002-of-00002.bin",
445
+ "transformer.visual.transformer.resblocks.2.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
446
+ "transformer.visual.transformer.resblocks.2.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
447
+ "transformer.visual.transformer.resblocks.2.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
448
+ "transformer.visual.transformer.resblocks.2.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
449
+ "transformer.visual.transformer.resblocks.20.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
450
+ "transformer.visual.transformer.resblocks.20.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
451
+ "transformer.visual.transformer.resblocks.20.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
452
+ "transformer.visual.transformer.resblocks.20.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
453
+ "transformer.visual.transformer.resblocks.20.ln_1.bias": "pytorch_model-00002-of-00002.bin",
454
+ "transformer.visual.transformer.resblocks.20.ln_1.weight": "pytorch_model-00002-of-00002.bin",
455
+ "transformer.visual.transformer.resblocks.20.ln_2.bias": "pytorch_model-00002-of-00002.bin",
456
+ "transformer.visual.transformer.resblocks.20.ln_2.weight": "pytorch_model-00002-of-00002.bin",
457
+ "transformer.visual.transformer.resblocks.20.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
458
+ "transformer.visual.transformer.resblocks.20.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
459
+ "transformer.visual.transformer.resblocks.20.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
460
+ "transformer.visual.transformer.resblocks.20.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
461
+ "transformer.visual.transformer.resblocks.21.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
462
+ "transformer.visual.transformer.resblocks.21.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
463
+ "transformer.visual.transformer.resblocks.21.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
464
+ "transformer.visual.transformer.resblocks.21.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
465
+ "transformer.visual.transformer.resblocks.21.ln_1.bias": "pytorch_model-00002-of-00002.bin",
466
+ "transformer.visual.transformer.resblocks.21.ln_1.weight": "pytorch_model-00002-of-00002.bin",
467
+ "transformer.visual.transformer.resblocks.21.ln_2.bias": "pytorch_model-00002-of-00002.bin",
468
+ "transformer.visual.transformer.resblocks.21.ln_2.weight": "pytorch_model-00002-of-00002.bin",
469
+ "transformer.visual.transformer.resblocks.21.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
470
+ "transformer.visual.transformer.resblocks.21.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
471
+ "transformer.visual.transformer.resblocks.21.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
472
+ "transformer.visual.transformer.resblocks.21.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
473
+ "transformer.visual.transformer.resblocks.22.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
474
+ "transformer.visual.transformer.resblocks.22.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
475
+ "transformer.visual.transformer.resblocks.22.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
476
+ "transformer.visual.transformer.resblocks.22.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
477
+ "transformer.visual.transformer.resblocks.22.ln_1.bias": "pytorch_model-00002-of-00002.bin",
478
+ "transformer.visual.transformer.resblocks.22.ln_1.weight": "pytorch_model-00002-of-00002.bin",
479
+ "transformer.visual.transformer.resblocks.22.ln_2.bias": "pytorch_model-00002-of-00002.bin",
480
+ "transformer.visual.transformer.resblocks.22.ln_2.weight": "pytorch_model-00002-of-00002.bin",
481
+ "transformer.visual.transformer.resblocks.22.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
482
+ "transformer.visual.transformer.resblocks.22.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
483
+ "transformer.visual.transformer.resblocks.22.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
484
+ "transformer.visual.transformer.resblocks.22.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
485
+ "transformer.visual.transformer.resblocks.23.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
486
+ "transformer.visual.transformer.resblocks.23.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
487
+ "transformer.visual.transformer.resblocks.23.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
488
+ "transformer.visual.transformer.resblocks.23.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
489
+ "transformer.visual.transformer.resblocks.23.ln_1.bias": "pytorch_model-00002-of-00002.bin",
490
+ "transformer.visual.transformer.resblocks.23.ln_1.weight": "pytorch_model-00002-of-00002.bin",
491
+ "transformer.visual.transformer.resblocks.23.ln_2.bias": "pytorch_model-00002-of-00002.bin",
492
+ "transformer.visual.transformer.resblocks.23.ln_2.weight": "pytorch_model-00002-of-00002.bin",
493
+ "transformer.visual.transformer.resblocks.23.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
494
+ "transformer.visual.transformer.resblocks.23.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
495
+ "transformer.visual.transformer.resblocks.23.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
496
+ "transformer.visual.transformer.resblocks.23.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
497
+ "transformer.visual.transformer.resblocks.24.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
498
+ "transformer.visual.transformer.resblocks.24.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
499
+ "transformer.visual.transformer.resblocks.24.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
500
+ "transformer.visual.transformer.resblocks.24.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
501
+ "transformer.visual.transformer.resblocks.24.ln_1.bias": "pytorch_model-00002-of-00002.bin",
502
+ "transformer.visual.transformer.resblocks.24.ln_1.weight": "pytorch_model-00002-of-00002.bin",
503
+ "transformer.visual.transformer.resblocks.24.ln_2.bias": "pytorch_model-00002-of-00002.bin",
504
+ "transformer.visual.transformer.resblocks.24.ln_2.weight": "pytorch_model-00002-of-00002.bin",
505
+ "transformer.visual.transformer.resblocks.24.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
506
+ "transformer.visual.transformer.resblocks.24.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
507
+ "transformer.visual.transformer.resblocks.24.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
508
+ "transformer.visual.transformer.resblocks.24.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
509
+ "transformer.visual.transformer.resblocks.25.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
510
+ "transformer.visual.transformer.resblocks.25.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
511
+ "transformer.visual.transformer.resblocks.25.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
512
+ "transformer.visual.transformer.resblocks.25.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
513
+ "transformer.visual.transformer.resblocks.25.ln_1.bias": "pytorch_model-00002-of-00002.bin",
514
+ "transformer.visual.transformer.resblocks.25.ln_1.weight": "pytorch_model-00002-of-00002.bin",
515
+ "transformer.visual.transformer.resblocks.25.ln_2.bias": "pytorch_model-00002-of-00002.bin",
516
+ "transformer.visual.transformer.resblocks.25.ln_2.weight": "pytorch_model-00002-of-00002.bin",
517
+ "transformer.visual.transformer.resblocks.25.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
518
+ "transformer.visual.transformer.resblocks.25.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
519
+ "transformer.visual.transformer.resblocks.25.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
520
+ "transformer.visual.transformer.resblocks.25.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
521
+ "transformer.visual.transformer.resblocks.26.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
522
+ "transformer.visual.transformer.resblocks.26.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
523
+ "transformer.visual.transformer.resblocks.26.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
524
+ "transformer.visual.transformer.resblocks.26.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
525
+ "transformer.visual.transformer.resblocks.26.ln_1.bias": "pytorch_model-00002-of-00002.bin",
526
+ "transformer.visual.transformer.resblocks.26.ln_1.weight": "pytorch_model-00002-of-00002.bin",
527
+ "transformer.visual.transformer.resblocks.26.ln_2.bias": "pytorch_model-00002-of-00002.bin",
528
+ "transformer.visual.transformer.resblocks.26.ln_2.weight": "pytorch_model-00002-of-00002.bin",
529
+ "transformer.visual.transformer.resblocks.26.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
530
+ "transformer.visual.transformer.resblocks.26.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
531
+ "transformer.visual.transformer.resblocks.26.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
532
+ "transformer.visual.transformer.resblocks.26.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
533
+ "transformer.visual.transformer.resblocks.27.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
534
+ "transformer.visual.transformer.resblocks.27.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
535
+ "transformer.visual.transformer.resblocks.27.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
536
+ "transformer.visual.transformer.resblocks.27.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
537
+ "transformer.visual.transformer.resblocks.27.ln_1.bias": "pytorch_model-00002-of-00002.bin",
538
+ "transformer.visual.transformer.resblocks.27.ln_1.weight": "pytorch_model-00002-of-00002.bin",
539
+ "transformer.visual.transformer.resblocks.27.ln_2.bias": "pytorch_model-00002-of-00002.bin",
540
+ "transformer.visual.transformer.resblocks.27.ln_2.weight": "pytorch_model-00002-of-00002.bin",
541
+ "transformer.visual.transformer.resblocks.27.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
542
+ "transformer.visual.transformer.resblocks.27.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
543
+ "transformer.visual.transformer.resblocks.27.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
544
+ "transformer.visual.transformer.resblocks.27.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
545
+ "transformer.visual.transformer.resblocks.28.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
546
+ "transformer.visual.transformer.resblocks.28.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
547
+ "transformer.visual.transformer.resblocks.28.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
548
+ "transformer.visual.transformer.resblocks.28.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
549
+ "transformer.visual.transformer.resblocks.28.ln_1.bias": "pytorch_model-00002-of-00002.bin",
550
+ "transformer.visual.transformer.resblocks.28.ln_1.weight": "pytorch_model-00002-of-00002.bin",
551
+ "transformer.visual.transformer.resblocks.28.ln_2.bias": "pytorch_model-00002-of-00002.bin",
552
+ "transformer.visual.transformer.resblocks.28.ln_2.weight": "pytorch_model-00002-of-00002.bin",
553
+ "transformer.visual.transformer.resblocks.28.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
554
+ "transformer.visual.transformer.resblocks.28.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
555
+ "transformer.visual.transformer.resblocks.28.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
556
+ "transformer.visual.transformer.resblocks.28.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
557
+ "transformer.visual.transformer.resblocks.29.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
558
+ "transformer.visual.transformer.resblocks.29.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
559
+ "transformer.visual.transformer.resblocks.29.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
560
+ "transformer.visual.transformer.resblocks.29.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
561
+ "transformer.visual.transformer.resblocks.29.ln_1.bias": "pytorch_model-00002-of-00002.bin",
562
+ "transformer.visual.transformer.resblocks.29.ln_1.weight": "pytorch_model-00002-of-00002.bin",
563
+ "transformer.visual.transformer.resblocks.29.ln_2.bias": "pytorch_model-00002-of-00002.bin",
564
+ "transformer.visual.transformer.resblocks.29.ln_2.weight": "pytorch_model-00002-of-00002.bin",
565
+ "transformer.visual.transformer.resblocks.29.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
566
+ "transformer.visual.transformer.resblocks.29.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
567
+ "transformer.visual.transformer.resblocks.29.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
568
+ "transformer.visual.transformer.resblocks.29.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
569
+ "transformer.visual.transformer.resblocks.3.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
570
+ "transformer.visual.transformer.resblocks.3.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
571
+ "transformer.visual.transformer.resblocks.3.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
572
+ "transformer.visual.transformer.resblocks.3.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
573
+ "transformer.visual.transformer.resblocks.3.ln_1.bias": "pytorch_model-00002-of-00002.bin",
574
+ "transformer.visual.transformer.resblocks.3.ln_1.weight": "pytorch_model-00002-of-00002.bin",
575
+ "transformer.visual.transformer.resblocks.3.ln_2.bias": "pytorch_model-00002-of-00002.bin",
576
+ "transformer.visual.transformer.resblocks.3.ln_2.weight": "pytorch_model-00002-of-00002.bin",
577
+ "transformer.visual.transformer.resblocks.3.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
578
+ "transformer.visual.transformer.resblocks.3.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
579
+ "transformer.visual.transformer.resblocks.3.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
580
+ "transformer.visual.transformer.resblocks.3.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
581
+ "transformer.visual.transformer.resblocks.30.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
582
+ "transformer.visual.transformer.resblocks.30.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
583
+ "transformer.visual.transformer.resblocks.30.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
584
+ "transformer.visual.transformer.resblocks.30.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
585
+ "transformer.visual.transformer.resblocks.30.ln_1.bias": "pytorch_model-00002-of-00002.bin",
586
+ "transformer.visual.transformer.resblocks.30.ln_1.weight": "pytorch_model-00002-of-00002.bin",
587
+ "transformer.visual.transformer.resblocks.30.ln_2.bias": "pytorch_model-00002-of-00002.bin",
588
+ "transformer.visual.transformer.resblocks.30.ln_2.weight": "pytorch_model-00002-of-00002.bin",
589
+ "transformer.visual.transformer.resblocks.30.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
590
+ "transformer.visual.transformer.resblocks.30.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
591
+ "transformer.visual.transformer.resblocks.30.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
592
+ "transformer.visual.transformer.resblocks.30.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
593
+ "transformer.visual.transformer.resblocks.31.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
594
+ "transformer.visual.transformer.resblocks.31.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
595
+ "transformer.visual.transformer.resblocks.31.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
596
+ "transformer.visual.transformer.resblocks.31.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
597
+ "transformer.visual.transformer.resblocks.31.ln_1.bias": "pytorch_model-00002-of-00002.bin",
598
+ "transformer.visual.transformer.resblocks.31.ln_1.weight": "pytorch_model-00002-of-00002.bin",
599
+ "transformer.visual.transformer.resblocks.31.ln_2.bias": "pytorch_model-00002-of-00002.bin",
600
+ "transformer.visual.transformer.resblocks.31.ln_2.weight": "pytorch_model-00002-of-00002.bin",
601
+ "transformer.visual.transformer.resblocks.31.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
602
+ "transformer.visual.transformer.resblocks.31.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
603
+ "transformer.visual.transformer.resblocks.31.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
604
+ "transformer.visual.transformer.resblocks.31.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
605
+ "transformer.visual.transformer.resblocks.32.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
606
+ "transformer.visual.transformer.resblocks.32.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
607
+ "transformer.visual.transformer.resblocks.32.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
608
+ "transformer.visual.transformer.resblocks.32.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
609
+ "transformer.visual.transformer.resblocks.32.ln_1.bias": "pytorch_model-00002-of-00002.bin",
610
+ "transformer.visual.transformer.resblocks.32.ln_1.weight": "pytorch_model-00002-of-00002.bin",
611
+ "transformer.visual.transformer.resblocks.32.ln_2.bias": "pytorch_model-00002-of-00002.bin",
612
+ "transformer.visual.transformer.resblocks.32.ln_2.weight": "pytorch_model-00002-of-00002.bin",
613
+ "transformer.visual.transformer.resblocks.32.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
614
+ "transformer.visual.transformer.resblocks.32.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
615
+ "transformer.visual.transformer.resblocks.32.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
616
+ "transformer.visual.transformer.resblocks.32.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
617
+ "transformer.visual.transformer.resblocks.33.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
618
+ "transformer.visual.transformer.resblocks.33.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
619
+ "transformer.visual.transformer.resblocks.33.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
620
+ "transformer.visual.transformer.resblocks.33.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
621
+ "transformer.visual.transformer.resblocks.33.ln_1.bias": "pytorch_model-00002-of-00002.bin",
622
+ "transformer.visual.transformer.resblocks.33.ln_1.weight": "pytorch_model-00002-of-00002.bin",
623
+ "transformer.visual.transformer.resblocks.33.ln_2.bias": "pytorch_model-00002-of-00002.bin",
624
+ "transformer.visual.transformer.resblocks.33.ln_2.weight": "pytorch_model-00002-of-00002.bin",
625
+ "transformer.visual.transformer.resblocks.33.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
626
+ "transformer.visual.transformer.resblocks.33.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
627
+ "transformer.visual.transformer.resblocks.33.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
628
+ "transformer.visual.transformer.resblocks.33.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
629
+ "transformer.visual.transformer.resblocks.34.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
630
+ "transformer.visual.transformer.resblocks.34.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
631
+ "transformer.visual.transformer.resblocks.34.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
632
+ "transformer.visual.transformer.resblocks.34.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
633
+ "transformer.visual.transformer.resblocks.34.ln_1.bias": "pytorch_model-00002-of-00002.bin",
634
+ "transformer.visual.transformer.resblocks.34.ln_1.weight": "pytorch_model-00002-of-00002.bin",
635
+ "transformer.visual.transformer.resblocks.34.ln_2.bias": "pytorch_model-00002-of-00002.bin",
636
+ "transformer.visual.transformer.resblocks.34.ln_2.weight": "pytorch_model-00002-of-00002.bin",
637
+ "transformer.visual.transformer.resblocks.34.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
638
+ "transformer.visual.transformer.resblocks.34.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
639
+ "transformer.visual.transformer.resblocks.34.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
640
+ "transformer.visual.transformer.resblocks.34.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
641
+ "transformer.visual.transformer.resblocks.35.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
642
+ "transformer.visual.transformer.resblocks.35.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
643
+ "transformer.visual.transformer.resblocks.35.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
644
+ "transformer.visual.transformer.resblocks.35.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
645
+ "transformer.visual.transformer.resblocks.35.ln_1.bias": "pytorch_model-00002-of-00002.bin",
646
+ "transformer.visual.transformer.resblocks.35.ln_1.weight": "pytorch_model-00002-of-00002.bin",
647
+ "transformer.visual.transformer.resblocks.35.ln_2.bias": "pytorch_model-00002-of-00002.bin",
648
+ "transformer.visual.transformer.resblocks.35.ln_2.weight": "pytorch_model-00002-of-00002.bin",
649
+ "transformer.visual.transformer.resblocks.35.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
650
+ "transformer.visual.transformer.resblocks.35.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
651
+ "transformer.visual.transformer.resblocks.35.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
652
+ "transformer.visual.transformer.resblocks.35.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
653
+ "transformer.visual.transformer.resblocks.36.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
654
+ "transformer.visual.transformer.resblocks.36.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
655
+ "transformer.visual.transformer.resblocks.36.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
656
+ "transformer.visual.transformer.resblocks.36.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
657
+ "transformer.visual.transformer.resblocks.36.ln_1.bias": "pytorch_model-00002-of-00002.bin",
658
+ "transformer.visual.transformer.resblocks.36.ln_1.weight": "pytorch_model-00002-of-00002.bin",
659
+ "transformer.visual.transformer.resblocks.36.ln_2.bias": "pytorch_model-00002-of-00002.bin",
660
+ "transformer.visual.transformer.resblocks.36.ln_2.weight": "pytorch_model-00002-of-00002.bin",
661
+ "transformer.visual.transformer.resblocks.36.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
662
+ "transformer.visual.transformer.resblocks.36.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
663
+ "transformer.visual.transformer.resblocks.36.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
664
+ "transformer.visual.transformer.resblocks.36.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
665
+ "transformer.visual.transformer.resblocks.37.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
666
+ "transformer.visual.transformer.resblocks.37.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
667
+ "transformer.visual.transformer.resblocks.37.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
668
+ "transformer.visual.transformer.resblocks.37.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
669
+ "transformer.visual.transformer.resblocks.37.ln_1.bias": "pytorch_model-00002-of-00002.bin",
670
+ "transformer.visual.transformer.resblocks.37.ln_1.weight": "pytorch_model-00002-of-00002.bin",
671
+ "transformer.visual.transformer.resblocks.37.ln_2.bias": "pytorch_model-00002-of-00002.bin",
672
+ "transformer.visual.transformer.resblocks.37.ln_2.weight": "pytorch_model-00002-of-00002.bin",
673
+ "transformer.visual.transformer.resblocks.37.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
674
+ "transformer.visual.transformer.resblocks.37.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
675
+ "transformer.visual.transformer.resblocks.37.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
676
+ "transformer.visual.transformer.resblocks.37.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
677
+ "transformer.visual.transformer.resblocks.38.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
678
+ "transformer.visual.transformer.resblocks.38.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
679
+ "transformer.visual.transformer.resblocks.38.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
680
+ "transformer.visual.transformer.resblocks.38.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
681
+ "transformer.visual.transformer.resblocks.38.ln_1.bias": "pytorch_model-00002-of-00002.bin",
682
+ "transformer.visual.transformer.resblocks.38.ln_1.weight": "pytorch_model-00002-of-00002.bin",
683
+ "transformer.visual.transformer.resblocks.38.ln_2.bias": "pytorch_model-00002-of-00002.bin",
684
+ "transformer.visual.transformer.resblocks.38.ln_2.weight": "pytorch_model-00002-of-00002.bin",
685
+ "transformer.visual.transformer.resblocks.38.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
686
+ "transformer.visual.transformer.resblocks.38.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
687
+ "transformer.visual.transformer.resblocks.38.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
688
+ "transformer.visual.transformer.resblocks.38.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
689
+ "transformer.visual.transformer.resblocks.39.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
690
+ "transformer.visual.transformer.resblocks.39.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
691
+ "transformer.visual.transformer.resblocks.39.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
692
+ "transformer.visual.transformer.resblocks.39.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
693
+ "transformer.visual.transformer.resblocks.39.ln_1.bias": "pytorch_model-00002-of-00002.bin",
694
+ "transformer.visual.transformer.resblocks.39.ln_1.weight": "pytorch_model-00002-of-00002.bin",
695
+ "transformer.visual.transformer.resblocks.39.ln_2.bias": "pytorch_model-00002-of-00002.bin",
696
+ "transformer.visual.transformer.resblocks.39.ln_2.weight": "pytorch_model-00002-of-00002.bin",
697
+ "transformer.visual.transformer.resblocks.39.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
698
+ "transformer.visual.transformer.resblocks.39.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
699
+ "transformer.visual.transformer.resblocks.39.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
700
+ "transformer.visual.transformer.resblocks.39.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
701
+ "transformer.visual.transformer.resblocks.4.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
702
+ "transformer.visual.transformer.resblocks.4.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
703
+ "transformer.visual.transformer.resblocks.4.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
704
+ "transformer.visual.transformer.resblocks.4.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
705
+ "transformer.visual.transformer.resblocks.4.ln_1.bias": "pytorch_model-00002-of-00002.bin",
706
+ "transformer.visual.transformer.resblocks.4.ln_1.weight": "pytorch_model-00002-of-00002.bin",
707
+ "transformer.visual.transformer.resblocks.4.ln_2.bias": "pytorch_model-00002-of-00002.bin",
708
+ "transformer.visual.transformer.resblocks.4.ln_2.weight": "pytorch_model-00002-of-00002.bin",
709
+ "transformer.visual.transformer.resblocks.4.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
710
+ "transformer.visual.transformer.resblocks.4.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
711
+ "transformer.visual.transformer.resblocks.4.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
712
+ "transformer.visual.transformer.resblocks.4.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
713
+ "transformer.visual.transformer.resblocks.40.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
714
+ "transformer.visual.transformer.resblocks.40.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
715
+ "transformer.visual.transformer.resblocks.40.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
716
+ "transformer.visual.transformer.resblocks.40.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
717
+ "transformer.visual.transformer.resblocks.40.ln_1.bias": "pytorch_model-00002-of-00002.bin",
718
+ "transformer.visual.transformer.resblocks.40.ln_1.weight": "pytorch_model-00002-of-00002.bin",
719
+ "transformer.visual.transformer.resblocks.40.ln_2.bias": "pytorch_model-00002-of-00002.bin",
720
+ "transformer.visual.transformer.resblocks.40.ln_2.weight": "pytorch_model-00002-of-00002.bin",
721
+ "transformer.visual.transformer.resblocks.40.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
722
+ "transformer.visual.transformer.resblocks.40.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
723
+ "transformer.visual.transformer.resblocks.40.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
724
+ "transformer.visual.transformer.resblocks.40.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
725
+ "transformer.visual.transformer.resblocks.41.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
726
+ "transformer.visual.transformer.resblocks.41.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
727
+ "transformer.visual.transformer.resblocks.41.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
728
+ "transformer.visual.transformer.resblocks.41.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
729
+ "transformer.visual.transformer.resblocks.41.ln_1.bias": "pytorch_model-00002-of-00002.bin",
730
+ "transformer.visual.transformer.resblocks.41.ln_1.weight": "pytorch_model-00002-of-00002.bin",
731
+ "transformer.visual.transformer.resblocks.41.ln_2.bias": "pytorch_model-00002-of-00002.bin",
732
+ "transformer.visual.transformer.resblocks.41.ln_2.weight": "pytorch_model-00002-of-00002.bin",
733
+ "transformer.visual.transformer.resblocks.41.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
734
+ "transformer.visual.transformer.resblocks.41.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
735
+ "transformer.visual.transformer.resblocks.41.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
736
+ "transformer.visual.transformer.resblocks.41.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
737
+ "transformer.visual.transformer.resblocks.42.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
738
+ "transformer.visual.transformer.resblocks.42.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
739
+ "transformer.visual.transformer.resblocks.42.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
740
+ "transformer.visual.transformer.resblocks.42.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
741
+ "transformer.visual.transformer.resblocks.42.ln_1.bias": "pytorch_model-00002-of-00002.bin",
742
+ "transformer.visual.transformer.resblocks.42.ln_1.weight": "pytorch_model-00002-of-00002.bin",
743
+ "transformer.visual.transformer.resblocks.42.ln_2.bias": "pytorch_model-00002-of-00002.bin",
744
+ "transformer.visual.transformer.resblocks.42.ln_2.weight": "pytorch_model-00002-of-00002.bin",
745
+ "transformer.visual.transformer.resblocks.42.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
746
+ "transformer.visual.transformer.resblocks.42.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
747
+ "transformer.visual.transformer.resblocks.42.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
748
+ "transformer.visual.transformer.resblocks.42.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
749
+ "transformer.visual.transformer.resblocks.43.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
750
+ "transformer.visual.transformer.resblocks.43.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
751
+ "transformer.visual.transformer.resblocks.43.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
752
+ "transformer.visual.transformer.resblocks.43.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
753
+ "transformer.visual.transformer.resblocks.43.ln_1.bias": "pytorch_model-00002-of-00002.bin",
754
+ "transformer.visual.transformer.resblocks.43.ln_1.weight": "pytorch_model-00002-of-00002.bin",
755
+ "transformer.visual.transformer.resblocks.43.ln_2.bias": "pytorch_model-00002-of-00002.bin",
756
+ "transformer.visual.transformer.resblocks.43.ln_2.weight": "pytorch_model-00002-of-00002.bin",
757
+ "transformer.visual.transformer.resblocks.43.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
758
+ "transformer.visual.transformer.resblocks.43.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
759
+ "transformer.visual.transformer.resblocks.43.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
760
+ "transformer.visual.transformer.resblocks.43.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
761
+ "transformer.visual.transformer.resblocks.44.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
762
+ "transformer.visual.transformer.resblocks.44.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
763
+ "transformer.visual.transformer.resblocks.44.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
764
+ "transformer.visual.transformer.resblocks.44.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
765
+ "transformer.visual.transformer.resblocks.44.ln_1.bias": "pytorch_model-00002-of-00002.bin",
766
+ "transformer.visual.transformer.resblocks.44.ln_1.weight": "pytorch_model-00002-of-00002.bin",
767
+ "transformer.visual.transformer.resblocks.44.ln_2.bias": "pytorch_model-00002-of-00002.bin",
768
+ "transformer.visual.transformer.resblocks.44.ln_2.weight": "pytorch_model-00002-of-00002.bin",
769
+ "transformer.visual.transformer.resblocks.44.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
770
+ "transformer.visual.transformer.resblocks.44.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
771
+ "transformer.visual.transformer.resblocks.44.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
772
+ "transformer.visual.transformer.resblocks.44.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
773
+ "transformer.visual.transformer.resblocks.45.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
774
+ "transformer.visual.transformer.resblocks.45.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
775
+ "transformer.visual.transformer.resblocks.45.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
776
+ "transformer.visual.transformer.resblocks.45.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
777
+ "transformer.visual.transformer.resblocks.45.ln_1.bias": "pytorch_model-00002-of-00002.bin",
778
+ "transformer.visual.transformer.resblocks.45.ln_1.weight": "pytorch_model-00002-of-00002.bin",
779
+ "transformer.visual.transformer.resblocks.45.ln_2.bias": "pytorch_model-00002-of-00002.bin",
780
+ "transformer.visual.transformer.resblocks.45.ln_2.weight": "pytorch_model-00002-of-00002.bin",
781
+ "transformer.visual.transformer.resblocks.45.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
782
+ "transformer.visual.transformer.resblocks.45.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
783
+ "transformer.visual.transformer.resblocks.45.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
784
+ "transformer.visual.transformer.resblocks.45.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
785
+ "transformer.visual.transformer.resblocks.46.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
786
+ "transformer.visual.transformer.resblocks.46.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
787
+ "transformer.visual.transformer.resblocks.46.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
788
+ "transformer.visual.transformer.resblocks.46.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
789
+ "transformer.visual.transformer.resblocks.46.ln_1.bias": "pytorch_model-00002-of-00002.bin",
790
+ "transformer.visual.transformer.resblocks.46.ln_1.weight": "pytorch_model-00002-of-00002.bin",
791
+ "transformer.visual.transformer.resblocks.46.ln_2.bias": "pytorch_model-00002-of-00002.bin",
792
+ "transformer.visual.transformer.resblocks.46.ln_2.weight": "pytorch_model-00002-of-00002.bin",
793
+ "transformer.visual.transformer.resblocks.46.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
794
+ "transformer.visual.transformer.resblocks.46.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
795
+ "transformer.visual.transformer.resblocks.46.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
796
+ "transformer.visual.transformer.resblocks.46.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
797
+ "transformer.visual.transformer.resblocks.47.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
798
+ "transformer.visual.transformer.resblocks.47.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
799
+ "transformer.visual.transformer.resblocks.47.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
800
+ "transformer.visual.transformer.resblocks.47.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
801
+ "transformer.visual.transformer.resblocks.47.ln_1.bias": "pytorch_model-00002-of-00002.bin",
802
+ "transformer.visual.transformer.resblocks.47.ln_1.weight": "pytorch_model-00002-of-00002.bin",
803
+ "transformer.visual.transformer.resblocks.47.ln_2.bias": "pytorch_model-00002-of-00002.bin",
804
+ "transformer.visual.transformer.resblocks.47.ln_2.weight": "pytorch_model-00002-of-00002.bin",
805
+ "transformer.visual.transformer.resblocks.47.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
806
+ "transformer.visual.transformer.resblocks.47.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
807
+ "transformer.visual.transformer.resblocks.47.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
808
+ "transformer.visual.transformer.resblocks.47.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
809
+ "transformer.visual.transformer.resblocks.5.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
810
+ "transformer.visual.transformer.resblocks.5.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
811
+ "transformer.visual.transformer.resblocks.5.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
812
+ "transformer.visual.transformer.resblocks.5.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
813
+ "transformer.visual.transformer.resblocks.5.ln_1.bias": "pytorch_model-00002-of-00002.bin",
814
+ "transformer.visual.transformer.resblocks.5.ln_1.weight": "pytorch_model-00002-of-00002.bin",
815
+ "transformer.visual.transformer.resblocks.5.ln_2.bias": "pytorch_model-00002-of-00002.bin",
816
+ "transformer.visual.transformer.resblocks.5.ln_2.weight": "pytorch_model-00002-of-00002.bin",
817
+ "transformer.visual.transformer.resblocks.5.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
818
+ "transformer.visual.transformer.resblocks.5.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
819
+ "transformer.visual.transformer.resblocks.5.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
820
+ "transformer.visual.transformer.resblocks.5.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
821
+ "transformer.visual.transformer.resblocks.6.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
822
+ "transformer.visual.transformer.resblocks.6.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
823
+ "transformer.visual.transformer.resblocks.6.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
824
+ "transformer.visual.transformer.resblocks.6.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
825
+ "transformer.visual.transformer.resblocks.6.ln_1.bias": "pytorch_model-00002-of-00002.bin",
826
+ "transformer.visual.transformer.resblocks.6.ln_1.weight": "pytorch_model-00002-of-00002.bin",
827
+ "transformer.visual.transformer.resblocks.6.ln_2.bias": "pytorch_model-00002-of-00002.bin",
828
+ "transformer.visual.transformer.resblocks.6.ln_2.weight": "pytorch_model-00002-of-00002.bin",
829
+ "transformer.visual.transformer.resblocks.6.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
830
+ "transformer.visual.transformer.resblocks.6.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
831
+ "transformer.visual.transformer.resblocks.6.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
832
+ "transformer.visual.transformer.resblocks.6.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
833
+ "transformer.visual.transformer.resblocks.7.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
834
+ "transformer.visual.transformer.resblocks.7.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
835
+ "transformer.visual.transformer.resblocks.7.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
836
+ "transformer.visual.transformer.resblocks.7.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
837
+ "transformer.visual.transformer.resblocks.7.ln_1.bias": "pytorch_model-00002-of-00002.bin",
838
+ "transformer.visual.transformer.resblocks.7.ln_1.weight": "pytorch_model-00002-of-00002.bin",
839
+ "transformer.visual.transformer.resblocks.7.ln_2.bias": "pytorch_model-00002-of-00002.bin",
840
+ "transformer.visual.transformer.resblocks.7.ln_2.weight": "pytorch_model-00002-of-00002.bin",
841
+ "transformer.visual.transformer.resblocks.7.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
842
+ "transformer.visual.transformer.resblocks.7.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
843
+ "transformer.visual.transformer.resblocks.7.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
844
+ "transformer.visual.transformer.resblocks.7.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
845
+ "transformer.visual.transformer.resblocks.8.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
846
+ "transformer.visual.transformer.resblocks.8.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
847
+ "transformer.visual.transformer.resblocks.8.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
848
+ "transformer.visual.transformer.resblocks.8.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
849
+ "transformer.visual.transformer.resblocks.8.ln_1.bias": "pytorch_model-00002-of-00002.bin",
850
+ "transformer.visual.transformer.resblocks.8.ln_1.weight": "pytorch_model-00002-of-00002.bin",
851
+ "transformer.visual.transformer.resblocks.8.ln_2.bias": "pytorch_model-00002-of-00002.bin",
852
+ "transformer.visual.transformer.resblocks.8.ln_2.weight": "pytorch_model-00002-of-00002.bin",
853
+ "transformer.visual.transformer.resblocks.8.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
854
+ "transformer.visual.transformer.resblocks.8.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
855
+ "transformer.visual.transformer.resblocks.8.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
856
+ "transformer.visual.transformer.resblocks.8.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
857
+ "transformer.visual.transformer.resblocks.9.attn.in_proj.bias": "pytorch_model-00002-of-00002.bin",
858
+ "transformer.visual.transformer.resblocks.9.attn.in_proj.weight": "pytorch_model-00002-of-00002.bin",
859
+ "transformer.visual.transformer.resblocks.9.attn.out_proj.bias": "pytorch_model-00002-of-00002.bin",
860
+ "transformer.visual.transformer.resblocks.9.attn.out_proj.weight": "pytorch_model-00002-of-00002.bin",
861
+ "transformer.visual.transformer.resblocks.9.ln_1.bias": "pytorch_model-00002-of-00002.bin",
862
+ "transformer.visual.transformer.resblocks.9.ln_1.weight": "pytorch_model-00002-of-00002.bin",
863
+ "transformer.visual.transformer.resblocks.9.ln_2.bias": "pytorch_model-00002-of-00002.bin",
864
+ "transformer.visual.transformer.resblocks.9.ln_2.weight": "pytorch_model-00002-of-00002.bin",
865
+ "transformer.visual.transformer.resblocks.9.mlp.c_fc.bias": "pytorch_model-00002-of-00002.bin",
866
+ "transformer.visual.transformer.resblocks.9.mlp.c_fc.weight": "pytorch_model-00002-of-00002.bin",
867
+ "transformer.visual.transformer.resblocks.9.mlp.c_proj.bias": "pytorch_model-00002-of-00002.bin",
868
+ "transformer.visual.transformer.resblocks.9.mlp.c_proj.weight": "pytorch_model-00002-of-00002.bin",
869
+ "transformer.wte.weight": "pytorch_model-00001-of-00002.bin"
870
+ }
871
+ }
tokenization_qwen.py ADDED
@@ -0,0 +1,593 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba Cloud.
2
+ #
3
+ # This source code is licensed under the license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ """Tokenization classes for QWen."""
7
+
8
+ import base64
9
+ import logging
10
+ import os
11
+ import requests
12
+ import unicodedata
13
+ from typing import Collection, Dict, List, Set, Tuple, Union, Any, Callable, Optional
14
+ import pdb
15
+ import tiktoken
16
+ import numpy as np
17
+ from PIL import Image
18
+ from PIL import ImageFont
19
+ from PIL import ImageDraw
20
+ from transformers import PreTrainedTokenizer, AddedToken
21
+ from transformers.utils import try_to_load_from_cache
22
+
23
+ import matplotlib.colors as mcolors
24
+ from matplotlib.font_manager import FontProperties
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ VOCAB_FILES_NAMES = {"vocab_file": "qwen.tiktoken", "ttf": "SimSun.ttf"}
30
+ # pdb.set_trace()
31
+ FONT_PATH = try_to_load_from_cache("Qwen/Qwen-VL-Chat", "SimSun.ttf")
32
+ if FONT_PATH is None:
33
+ # if not os.path.exists("SimSun.ttf"):
34
+ # ttf = requests.get("https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/SimSun.ttf")
35
+ # open("SimSun.ttf", "wb").write(ttf.content)
36
+ FONT_PATH = "SimSun.ttf"
37
+
38
+ PAT_STR = r"""(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\r\n\p{L}\p{N}]?\p{L}+|\p{N}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+"""
39
+ ENDOFTEXT = "<|endoftext|>"
40
+ IMSTART = "<|im_start|>"
41
+ IMEND = "<|im_end|>"
42
+ # as the default behavior is changed to allow special tokens in
43
+ # regular texts, the surface forms of special tokens need to be
44
+ # as different as possible to minimize the impact
45
+ EXTRAS = tuple((f"<|extra_{i}|>" for i in range(205)))
46
+ SPECIAL_TOKENS = (
47
+ ENDOFTEXT,
48
+ IMSTART,
49
+ IMEND,
50
+ ) + EXTRAS
51
+ IMG_TOKEN_SPAN = 512
52
+
53
+
54
+ def _load_tiktoken_bpe(tiktoken_bpe_file: str) -> Dict[bytes, int]:
55
+ with open(tiktoken_bpe_file, "rb") as f:
56
+ contents = f.read()
57
+ return {
58
+ base64.b64decode(token): int(rank)
59
+ for token, rank in (line.split() for line in contents.splitlines() if line)
60
+ }
61
+
62
+ def _list_find(
63
+ input_list: List[Any],
64
+ candidates: Tuple[Any],
65
+ start: int = 0,
66
+ ):
67
+ for i in range(start, len(input_list)):
68
+ if input_list[i] in candidates:
69
+ return i
70
+ return -1
71
+
72
+ def _replace_closed_tag(
73
+ input_tokens: List[Any],
74
+ start_tags: Union[Any, Tuple[Any]],
75
+ end_tags: Union[Any, Tuple[Any]],
76
+ inclusive_replace_func: Callable,
77
+ exclusive_replace_func: Callable = lambda x: x,
78
+ ):
79
+ if isinstance(start_tags, (str, int)):
80
+ start_tags = (start_tags,)
81
+ if isinstance(end_tags, (str, int)):
82
+ end_tags = (end_tags,)
83
+ assert len(start_tags) == len(end_tags)
84
+
85
+ output_tokens = []
86
+ end = 0
87
+ while True:
88
+ start = _list_find(input_tokens, start_tags, end)
89
+ if start == -1:
90
+ break
91
+ output_tokens.extend(exclusive_replace_func(input_tokens[end : start]))
92
+ tag_idx = start_tags.index(input_tokens[start])
93
+ end = _list_find(input_tokens, (end_tags[tag_idx],), start)
94
+ if end == -1:
95
+ raise ValueError("Unclosed image token")
96
+ output_tokens.extend(inclusive_replace_func(input_tokens[start : end + 1]))
97
+ end += 1
98
+ output_tokens.extend(exclusive_replace_func(input_tokens[end : ]))
99
+ return output_tokens
100
+
101
+ class QWenTokenizer(PreTrainedTokenizer):
102
+ """QWen tokenizer."""
103
+
104
+ vocab_files_names = VOCAB_FILES_NAMES
105
+
106
+ def __init__(
107
+ self,
108
+ vocab_file,
109
+ errors="replace",
110
+ image_start_tag='<img>',
111
+ image_end_tag='</img>',
112
+ image_pad_tag='<imgpad>',
113
+ ref_start_tag='<ref>',
114
+ ref_end_tag='</ref>',
115
+ box_start_tag='<box>',
116
+ box_end_tag='</box>',
117
+ quad_start_tag='<quad>',
118
+ quad_end_tag='</quad>',
119
+ **kwargs,
120
+ ):
121
+ super().__init__(**kwargs)
122
+ self.image_start_tag = image_start_tag
123
+ self.image_end_tag = image_end_tag
124
+ self.image_pad_tag = image_pad_tag
125
+ self.ref_start_tag = ref_start_tag
126
+ self.ref_end_tag = ref_end_tag
127
+ self.box_start_tag = box_start_tag
128
+ self.box_end_tag = box_end_tag
129
+ self.quad_start_tag = quad_start_tag
130
+ self.quad_end_tag = quad_end_tag
131
+ self.IMAGE_ST = (
132
+ ref_start_tag, ref_end_tag,
133
+ box_start_tag, box_end_tag,
134
+ quad_start_tag, quad_end_tag,
135
+ image_start_tag, image_end_tag,
136
+ image_pad_tag
137
+ )
138
+
139
+ self.errors = errors # how to handle errors in decoding
140
+
141
+ self.mergeable_ranks = _load_tiktoken_bpe(vocab_file) # type: dict[bytes, int]
142
+ self.special_tokens = {
143
+ token: index
144
+ for index, token in enumerate(
145
+ SPECIAL_TOKENS + self.IMAGE_ST, start=len(self.mergeable_ranks)
146
+ )
147
+ }
148
+ self.img_start_id = self.special_tokens[self.image_start_tag]
149
+ self.img_end_id = self.special_tokens[self.image_end_tag]
150
+ self.img_pad_id = self.special_tokens[self.image_pad_tag]
151
+ self.ref_start_id = self.special_tokens[self.ref_start_tag]
152
+ self.ref_end_id = self.special_tokens[self.ref_end_tag]
153
+ self.box_start_id = self.special_tokens[self.box_start_tag]
154
+ self.box_end_id = self.special_tokens[self.box_end_tag]
155
+ self.quad_start_id = self.special_tokens[self.quad_start_tag]
156
+ self.quad_end_id = self.special_tokens[self.quad_end_tag]
157
+
158
+ enc = tiktoken.Encoding(
159
+ "Qwen",
160
+ pat_str=PAT_STR,
161
+ mergeable_ranks=self.mergeable_ranks,
162
+ special_tokens=self.special_tokens,
163
+ )
164
+ assert (
165
+ len(self.mergeable_ranks) + len(self.special_tokens) == enc.n_vocab
166
+ ), f"{len(self.mergeable_ranks) + len(self.special_tokens)} != {enc.n_vocab} in encoding"
167
+
168
+ self.decoder = {
169
+ v: k for k, v in self.mergeable_ranks.items()
170
+ } # type: dict[int, bytes|str]
171
+ self.decoder.update({v: k for k, v in self.special_tokens.items()})
172
+
173
+ self.tokenizer = enc # type: tiktoken.Encoding
174
+
175
+ self.eod_id = self.tokenizer.eot_token
176
+ self.im_start_id = self.special_tokens[IMSTART]
177
+ self.im_end_id = self.special_tokens[IMEND]
178
+
179
+ def __getstate__(self):
180
+ # for pickle lovers
181
+ state = self.__dict__.copy()
182
+ del state['tokenizer']
183
+ return state
184
+
185
+ def __setstate__(self, state):
186
+ # tokenizer is not python native; don't pass it; rebuild it
187
+ self.__dict__.update(state)
188
+ enc = tiktoken.Encoding(
189
+ "Qwen",
190
+ pat_str=PAT_STR,
191
+ mergeable_ranks=self.mergeable_ranks,
192
+ special_tokens=self.special_tokens,
193
+ )
194
+ self.tokenizer = enc
195
+
196
+
197
+ def __len__(self) -> int:
198
+ return self.tokenizer.n_vocab
199
+
200
+ def get_vocab(self) -> Dict[bytes, int]:
201
+ return self.mergeable_ranks
202
+
203
+ def convert_tokens_to_ids(
204
+ self, tokens: Union[bytes, str, List[Union[bytes, str]]]
205
+ ) -> List[int]:
206
+ ids = []
207
+ if isinstance(tokens, (str, bytes)):
208
+ if tokens in self.special_tokens:
209
+ return self.special_tokens[tokens]
210
+ else:
211
+ return self.mergeable_ranks.get(tokens)
212
+ for token in tokens:
213
+ if token in self.special_tokens:
214
+ ids.append(self.special_tokens[token])
215
+ else:
216
+ ids.append(self.mergeable_ranks.get(token))
217
+ return ids
218
+
219
+ def _add_tokens(self, new_tokens: Union[List[str], List[AddedToken]], special_tokens: bool = False) -> int:
220
+ if not special_tokens and new_tokens:
221
+ raise ValueError('Adding regular tokens is not supported')
222
+ for token in new_tokens:
223
+ surface_form = token.content if isinstance(token, AddedToken) else token
224
+ if surface_form not in SPECIAL_TOKENS + self.IMAGE_ST:
225
+ raise ValueError('Adding unknown special tokens is not supported')
226
+ return 0
227
+
228
+ def save_vocabulary(self, save_directory: str, **kwargs) -> Tuple[str]:
229
+ """
230
+ Save only the vocabulary of the tokenizer (vocabulary).
231
+
232
+ Returns:
233
+ `Tuple(str)`: Paths to the files saved.
234
+ """
235
+ file_path = os.path.join(save_directory, "qwen.tiktoken")
236
+ with open(file_path, "w", encoding="utf8") as w:
237
+ for k, v in self.mergeable_ranks.items():
238
+ line = base64.b64encode(k).decode("utf8") + " " + str(v) + "\n"
239
+ w.write(line)
240
+ return (file_path,)
241
+
242
+ def tokenize(
243
+ self,
244
+ text: str,
245
+ allowed_special: Union[Set, str] = "all",
246
+ disallowed_special: Union[Collection, str] = (),
247
+ **kwargs,
248
+ ) -> List[Union[bytes, str]]:
249
+ """
250
+ Converts a string in a sequence of tokens.
251
+
252
+ Args:
253
+ text (`str`):
254
+ The sequence to be encoded.
255
+ allowed_special (`Literal["all"]` or `set`):
256
+ The surface forms of the tokens to be encoded as special tokens in regular texts.
257
+ Default to "all".
258
+ disallowed_special (`Literal["all"]` or `Collection`):
259
+ The surface forms of the tokens that should not be in regular texts and trigger errors.
260
+ Default to an empty tuple.
261
+
262
+ kwargs (additional keyword arguments, *optional*):
263
+ Will be passed to the underlying model specific encode method.
264
+
265
+ Returns:
266
+ `List[bytes|str]`: The list of tokens.
267
+ """
268
+ tokens = []
269
+ text = unicodedata.normalize("NFC", text)
270
+
271
+ # this implementation takes a detour: text -> token id -> token surface forms
272
+ for t in self.tokenizer.encode(
273
+ text, allowed_special=allowed_special, disallowed_special=disallowed_special
274
+ ):
275
+ tokens.append(self.decoder[t])
276
+
277
+ def _encode_imgurl(img_tokens):
278
+ assert img_tokens[0] == self.image_start_tag and img_tokens[-1] == self.image_end_tag
279
+ img_tokens = img_tokens[1:-1]
280
+ img_url = b''.join(img_tokens)
281
+ out_img_tokens = list(map(self.decoder.get, img_url))
282
+ if len(out_img_tokens) > IMG_TOKEN_SPAN:
283
+ raise ValueError("The content in {}..{} is too long".format(
284
+ self.image_start_tag, self.image_end_tag))
285
+ out_img_tokens.extend([self.image_pad_tag] * (IMG_TOKEN_SPAN - len(out_img_tokens)))
286
+ out_img_tokens = [self.image_start_tag] + out_img_tokens + [self.image_end_tag]
287
+ return out_img_tokens
288
+
289
+ return _replace_closed_tag(tokens, self.image_start_tag, self.image_end_tag, _encode_imgurl)
290
+
291
+ def convert_tokens_to_string(self, tokens: List[Union[bytes, str]]) -> str:
292
+ """
293
+ Converts a sequence of tokens in a single string.
294
+ """
295
+ text = ""
296
+ temp = b""
297
+ for t in tokens:
298
+ if isinstance(t, str):
299
+ if temp:
300
+ text += temp.decode("utf-8", errors=self.errors)
301
+ temp = b""
302
+ text += t
303
+ elif isinstance(t, bytes):
304
+ temp += t
305
+ else:
306
+ raise TypeError("token should only be of type types or str")
307
+ if temp:
308
+ text += temp.decode("utf-8", errors=self.errors)
309
+ return text
310
+
311
+ @property
312
+ def vocab_size(self):
313
+ return self.tokenizer.n_vocab
314
+
315
+ def _convert_id_to_token(self, index: int) -> Union[bytes, str]:
316
+ """Converts an id to a token, special tokens included"""
317
+ if index in self.decoder:
318
+ return self.decoder[index]
319
+ raise ValueError("unknown ids")
320
+
321
+ def _convert_token_to_id(self, token: Union[bytes, str]) -> int:
322
+ """Converts a token to an id using the vocab, special tokens included"""
323
+ if token in self.special_tokens:
324
+ return self.special_tokens[token]
325
+ if token in self.mergeable_ranks:
326
+ return self.mergeable_ranks[token]
327
+ raise ValueError("unknown token")
328
+
329
+ def _tokenize(self, text: str, **kwargs):
330
+ """
331
+ Converts a string in a sequence of tokens (string), using the tokenizer. Split in words for word-based
332
+ vocabulary or sub-words for sub-word-based vocabularies (BPE/SentencePieces/WordPieces).
333
+
334
+ Do NOT take care of added tokens.
335
+ """
336
+ raise NotImplementedError
337
+
338
+ def _decode(
339
+ self,
340
+ token_ids: Union[int, List[int]],
341
+ skip_special_tokens: bool = False,
342
+ errors: str = None,
343
+ **kwargs,
344
+ ) -> str:
345
+ if isinstance(token_ids, int):
346
+ token_ids = [token_ids]
347
+
348
+ def _decode_imgurl(img_token_ids):
349
+ assert img_token_ids[0] == self.img_start_id and img_token_ids[-1] == self.img_end_id
350
+ img_token_ids = img_token_ids[1:-1]
351
+ img_token_ids = img_token_ids[ : img_token_ids.index(self.img_pad_id)]
352
+ img_url = bytes(img_token_ids).decode('utf-8')
353
+ return [self.img_start_id] + self.tokenizer.encode(img_url) + [self.img_end_id]
354
+
355
+ token_ids = _replace_closed_tag(token_ids, self.img_start_id, self.img_end_id, _decode_imgurl)
356
+
357
+ if skip_special_tokens:
358
+ token_ids = [i for i in token_ids if i < self.eod_id]
359
+ return self.tokenizer.decode(token_ids, errors=errors or self.errors)
360
+
361
+ def to_list_format(self, text: str):
362
+ # pdb.set_trace()
363
+ text = unicodedata.normalize("NFC", text)
364
+ token_ids = self.tokenizer.encode(
365
+ text, allowed_special=set(self.IMAGE_ST + (ENDOFTEXT,)))
366
+
367
+ def _encode_vl_info(tokens):
368
+ if len(tokens) == 0:
369
+ return []
370
+ if tokens[0] == self.img_start_id and tokens[-1] == self.img_end_id:
371
+ key = 'image'
372
+ elif tokens[0] == self.ref_start_id and tokens[-1] == self.ref_end_id:
373
+ key = 'ref'
374
+ elif tokens[0] == self.box_start_id and tokens[-1] == self.box_end_id:
375
+ key = 'box'
376
+ elif tokens[0] == self.quad_start_id and tokens[-1] == self.quad_end_id:
377
+ key = 'quad'
378
+ else:
379
+ _tobytes = lambda x: x.encode('utf-8') if isinstance(x, str) else x
380
+ return [{'text': b''.join(map(_tobytes, map(self.decoder.get, tokens))).decode('utf-8')}]
381
+ _tobytes = lambda x: x.encode('utf-8') if isinstance(x, str) else x
382
+ val = b''.join(map(_tobytes, map(self.decoder.get, tokens[1:-1]))).decode('utf-8')
383
+ return [{key: val}]
384
+
385
+ return _replace_closed_tag(
386
+ token_ids,
387
+ (self.img_start_id, self.ref_start_id, self.box_start_id, self.quad_start_id),
388
+ (self.img_end_id, self.ref_end_id, self.box_end_id, self.quad_end_id),
389
+ _encode_vl_info,
390
+ _encode_vl_info,
391
+ )
392
+
393
+ def from_list_format(self, list_format: List[Dict]):
394
+ # pdb.set_trace()
395
+ text = ''
396
+ num_images = 0
397
+ for ele in list_format:
398
+ if 'image' in ele:
399
+ num_images += 1
400
+ text += f'Picture {num_images}:'
401
+ text += self.image_start_tag + ele['image'] + self.image_end_tag
402
+ text += '\n'
403
+ elif 'text' in ele:
404
+ text += ele['text']
405
+ elif 'box' in ele:
406
+ if 'ref' in ele:
407
+ text += self.ref_start_tag + ele['ref'] + self.ref_end_tag
408
+ for box in ele['box']:
409
+ text += self.box_start_tag + '(%d,%d),(%d,%d)' % (box[0], box[1], box[2], box[3]) + self.box_end_tag
410
+ else:
411
+ raise ValueError("Unsupport element: " + str(ele))
412
+ return text
413
+
414
+ def _fetch_latest_picture(self, response, history):
415
+ if history is None:
416
+ history = []
417
+ _history = history + [(response, None)]
418
+ for q, r in _history[::-1]:
419
+ for ele in self.to_list_format(q)[::-1]:
420
+ if 'image' in ele:
421
+ return ele['image']
422
+ return None
423
+
424
+ def _fetch_all_box_with_ref(self, text):
425
+ list_format = self.to_list_format(text)
426
+ output = []
427
+ for i, ele in enumerate(list_format):
428
+ if 'box' in ele:
429
+ bbox = tuple(map(int, ele['box'].replace('(', '').replace(')', '').split(',')))
430
+ assert len(bbox) == 4
431
+ output.append({'box': bbox})
432
+ if i > 0 and 'ref' in list_format[i-1]:
433
+ output[-1]['ref'] = list_format[i-1]['ref'].strip()
434
+ return output
435
+
436
+ def draw_bbox_on_latest_picture(
437
+ self,
438
+ response,
439
+ history=None,
440
+ ) -> Optional[Image.Image]:
441
+ image = self._fetch_latest_picture(response, history)
442
+ if image is None:
443
+ return None
444
+ if image.startswith("http://") or image.startswith("https://"):
445
+ image = Image.open(requests.get(image, stream=True).raw).convert("RGB")
446
+ h, w = image.height, image.width
447
+ else:
448
+ image = np.asarray(Image.open(image).convert("RGB"))
449
+ h, w = image.shape[0], image.shape[1]
450
+ visualizer = Visualizer(image)
451
+
452
+ boxes = self._fetch_all_box_with_ref(response)
453
+ if not boxes:
454
+ return None
455
+ color = random.choice([_ for _ in mcolors.TABLEAU_COLORS.keys()]) # init color
456
+ for box in boxes:
457
+ if 'ref' in box: # random new color for new refexps
458
+ color = random.choice([_ for _ in mcolors.TABLEAU_COLORS.keys()])
459
+ x1, y1, x2, y2 = box['box']
460
+ x1, y1, x2, y2 = (int(x1 / 1000 * w), int(y1 / 1000 * h), int(x2 / 1000 * w), int(y2 / 1000 * h))
461
+ visualizer.draw_box((x1, y1, x2, y2), alpha=1, edge_color=color)
462
+ if 'ref' in box:
463
+ visualizer.draw_text(box['ref'], (x1, y1), color=color, horizontal_alignment="left")
464
+ return visualizer.output
465
+
466
+
467
+ import colorsys
468
+ import logging
469
+ import math
470
+ import numpy as np
471
+ import matplotlib as mpl
472
+ import matplotlib.colors as mplc
473
+ import matplotlib.figure as mplfigure
474
+ import torch
475
+ from matplotlib.backends.backend_agg import FigureCanvasAgg
476
+ from PIL import Image
477
+ import random
478
+
479
+ logger = logging.getLogger(__name__)
480
+
481
+
482
+ class VisImage:
483
+ def __init__(self, img, scale=1.0):
484
+ self.img = img
485
+ self.scale = scale
486
+ self.width, self.height = img.shape[1], img.shape[0]
487
+ self._setup_figure(img)
488
+
489
+ def _setup_figure(self, img):
490
+ fig = mplfigure.Figure(frameon=False)
491
+ self.dpi = fig.get_dpi()
492
+ # add a small 1e-2 to avoid precision lost due to matplotlib's truncation
493
+ # (https://github.com/matplotlib/matplotlib/issues/15363)
494
+ fig.set_size_inches(
495
+ (self.width * self.scale + 1e-2) / self.dpi,
496
+ (self.height * self.scale + 1e-2) / self.dpi,
497
+ )
498
+ self.canvas = FigureCanvasAgg(fig)
499
+ # self.canvas = mpl.backends.backend_cairo.FigureCanvasCairo(fig)
500
+ ax = fig.add_axes([0.0, 0.0, 1.0, 1.0])
501
+ ax.axis("off")
502
+ self.fig = fig
503
+ self.ax = ax
504
+ self.reset_image(img)
505
+
506
+ def reset_image(self, img):
507
+ img = img.astype("uint8")
508
+ self.ax.imshow(img, extent=(0, self.width, self.height, 0), interpolation="nearest")
509
+
510
+ def save(self, filepath):
511
+ self.fig.savefig(filepath)
512
+
513
+ def get_image(self):
514
+ canvas = self.canvas
515
+ s, (width, height) = canvas.print_to_buffer()
516
+
517
+ buffer = np.frombuffer(s, dtype="uint8")
518
+
519
+ img_rgba = buffer.reshape(height, width, 4)
520
+ rgb, alpha = np.split(img_rgba, [3], axis=2)
521
+ return rgb.astype("uint8")
522
+
523
+
524
+ class Visualizer:
525
+ def __init__(self, img_rgb, metadata=None, scale=1.0):
526
+ self.img = np.asarray(img_rgb).clip(0, 255).astype(np.uint8)
527
+ self.font_path = FONT_PATH
528
+ self.output = VisImage(self.img, scale=scale)
529
+ self.cpu_device = torch.device("cpu")
530
+
531
+ # too small texts are useless, therefore clamp to 14
532
+ self._default_font_size = max(
533
+ np.sqrt(self.output.height * self.output.width) // 30, 15 // scale
534
+ )
535
+
536
+ def draw_text(
537
+ self,
538
+ text,
539
+ position,
540
+ *,
541
+ font_size=None,
542
+ color="g",
543
+ horizontal_alignment="center",
544
+ rotation=0,
545
+ ):
546
+ if not font_size:
547
+ font_size = self._default_font_size
548
+
549
+ # since the text background is dark, we don't want the text to be dark
550
+ color = np.maximum(list(mplc.to_rgb(color)), 0.2)
551
+ color[np.argmax(color)] = max(0.8, np.max(color))
552
+
553
+ x, y = position
554
+ self.output.ax.text(
555
+ x,
556
+ y,
557
+ text,
558
+ size=font_size * self.output.scale,
559
+ fontproperties=FontProperties(fname=self.font_path),
560
+ bbox={"facecolor": "black", "alpha": 0.8, "pad": 0.7, "edgecolor": "none"},
561
+ verticalalignment="top",
562
+ horizontalalignment=horizontal_alignment,
563
+ color=color,
564
+ zorder=10,
565
+ rotation=rotation,
566
+ )
567
+ return self.output
568
+
569
+ def draw_box(self, box_coord, alpha=0.5, edge_color="g", line_style="-"):
570
+
571
+ x0, y0, x1, y1 = box_coord
572
+ width = x1 - x0
573
+ height = y1 - y0
574
+
575
+ linewidth = max(self._default_font_size / 4, 1)
576
+
577
+ self.output.ax.add_patch(
578
+ mpl.patches.Rectangle(
579
+ (x0, y0),
580
+ width,
581
+ height,
582
+ fill=False,
583
+ edgecolor=edge_color,
584
+ linewidth=linewidth * self.output.scale,
585
+ alpha=alpha,
586
+ linestyle=line_style,
587
+ )
588
+ )
589
+ return self.output
590
+
591
+ def get_output(self):
592
+
593
+ return self.output
visual.py ADDED
@@ -0,0 +1,482 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba Cloud.
2
+ #
3
+ # This source code is licensed under the license found in the
4
+ # LICENSE file in the root directory of this source tree.
5
+
6
+ from collections import OrderedDict
7
+ import math
8
+ import requests
9
+ from io import BytesIO
10
+ from functools import partial
11
+ from PIL import Image
12
+ from typing import Callable, Optional, Sequence, Tuple, List
13
+ import numpy as np
14
+ import torch
15
+ from torch import nn
16
+ from torch.nn import functional as F
17
+ from torch.nn.init import trunc_normal_
18
+ from torchvision import transforms
19
+ from torchvision.transforms import InterpolationMode
20
+ import pdb
21
+
22
+ def sliding_window(matrix, window_size, stride):
23
+ b,c,height, width = matrix.shape
24
+ window_rows = (height - window_size[0]) // stride + 1
25
+ window_cols = (width - window_size[1]) // stride + 1
26
+ images_448 = F.interpolate(matrix, size=window_size, mode='bicubic')
27
+ windows = []
28
+ # pdb.set_trace()
29
+ for i in range(window_rows):
30
+ windows_col = []
31
+ for j in range(window_cols):
32
+ window = matrix[:,:, i*stride:i*stride+window_size[0], j*stride:j*stride+window_size[1]]
33
+ windows.append(window)
34
+ # windows.append(windows_col)
35
+ windows.append(images_448)
36
+ images = torch.cat(windows,dim=1)
37
+ images = images.reshape(b*5,c,window_size[0], window_size[0])
38
+
39
+ return images
40
+
41
+
42
+ def get_abs_pos(abs_pos, tgt_size):
43
+ # abs_pos: L, C
44
+ # tgt_size: M
45
+ # return: M, C
46
+ src_size = int(math.sqrt(abs_pos.size(0)))
47
+ tgt_size = int(math.sqrt(tgt_size))
48
+ dtype = abs_pos.dtype
49
+
50
+ if src_size != tgt_size:
51
+ return F.interpolate(
52
+ abs_pos.float().reshape(1, src_size, src_size, -1).permute(0, 3, 1, 2),
53
+ size=(tgt_size, tgt_size),
54
+ mode="bicubic",
55
+ align_corners=False,
56
+ ).permute(0, 2, 3, 1).flatten(0, 2).to(dtype=dtype)
57
+ else:
58
+ return abs_pos
59
+
60
+ # https://github.com/facebookresearch/mae/blob/efb2a8062c206524e35e47d04501ed4f544c0ae8/util/pos_embed.py#L20
61
+ def get_2d_sincos_pos_embed(embed_dim, grid_size, cls_token=False):
62
+ """
63
+ grid_size: int of the grid height and width
64
+ return:
65
+ pos_embed: [grid_size*grid_size, embed_dim] or [1+grid_size*grid_size, embed_dim] (w/ or w/o cls_token)
66
+ """
67
+ grid_h = np.arange(grid_size, dtype=np.float32)
68
+ grid_w = np.arange(grid_size, dtype=np.float32)
69
+ grid = np.meshgrid(grid_w, grid_h) # here w goes first
70
+ grid = np.stack(grid, axis=0)
71
+
72
+ grid = grid.reshape([2, 1, grid_size, grid_size])
73
+ pos_embed = get_2d_sincos_pos_embed_from_grid(embed_dim, grid)
74
+ if cls_token:
75
+ pos_embed = np.concatenate([np.zeros([1, embed_dim]), pos_embed], axis=0)
76
+ return pos_embed
77
+
78
+
79
+ def get_2d_sincos_pos_embed_from_grid(embed_dim, grid):
80
+ assert embed_dim % 2 == 0
81
+
82
+ # use half of dimensions to encode grid_h
83
+ emb_h = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[0]) # (H*W, D/2)
84
+ emb_w = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[1]) # (H*W, D/2)
85
+
86
+ emb = np.concatenate([emb_h, emb_w], axis=1) # (H*W, D)
87
+ return emb
88
+
89
+
90
+ def get_1d_sincos_pos_embed_from_grid(embed_dim, pos):
91
+ """
92
+ embed_dim: output dimension for each position
93
+ pos: a list of positions to be encoded: size (M,)
94
+ out: (M, D)
95
+ """
96
+ assert embed_dim % 2 == 0
97
+ omega = np.arange(embed_dim // 2, dtype=np.float32)
98
+ omega /= embed_dim / 2.
99
+ omega = 1. / 10000**omega # (D/2,)
100
+
101
+ pos = pos.reshape(-1) # (M,)
102
+ out = np.einsum('m,d->md', pos, omega) # (M, D/2), outer product
103
+
104
+ emb_sin = np.sin(out) # (M, D/2)
105
+ emb_cos = np.cos(out) # (M, D/2)
106
+
107
+ emb = np.concatenate([emb_sin, emb_cos], axis=1) # (M, D)
108
+ return emb
109
+
110
+
111
+ class Resampler(nn.Module):
112
+ """
113
+ A 2D perceiver-resampler network with one cross attention layers by
114
+ (grid_size**2) learnable queries and 2d sincos pos_emb
115
+ Outputs:
116
+ A tensor with the shape of (grid_size**2, embed_dim)
117
+ """
118
+ def __init__(
119
+ self,
120
+ grid_size,
121
+ embed_dim,
122
+ num_heads,
123
+ kv_dim=None,
124
+ norm_layer=nn.LayerNorm
125
+ ):
126
+ super().__init__()
127
+ self.num_queries = grid_size ** 2
128
+ self.embed_dim = embed_dim
129
+ self.num_heads = num_heads
130
+
131
+ self.pos_embed = nn.Parameter(
132
+ torch.from_numpy(get_2d_sincos_pos_embed(embed_dim, grid_size)).float()
133
+ ).requires_grad_(False)
134
+
135
+ self.query = nn.Parameter(torch.zeros(self.num_queries, embed_dim))
136
+ trunc_normal_(self.query, std=.02)
137
+
138
+ if kv_dim is not None and kv_dim != embed_dim:
139
+ self.kv_proj = nn.Linear(kv_dim, embed_dim, bias=False)
140
+ else:
141
+ self.kv_proj = nn.Identity()
142
+
143
+ self.attn = nn.MultiheadAttention(embed_dim, num_heads)
144
+ self.ln_q = norm_layer(embed_dim)
145
+ self.ln_kv = norm_layer(embed_dim)
146
+
147
+ self.apply(self._init_weights)
148
+ # pdb.set_trace()
149
+ #self.load_state_dict(torch.load('/cfs/cfs-lugcocyb/mingdali/code/qWen-VL/vl-chat/attn_params.pth'))
150
+
151
+ def _init_weights(self, m):
152
+ # self.load_state_dict(torch.load('/cfs/cfs-lugcocyb/mingdali/code/qWen-VL/vl-chat/attn_params.pth'))
153
+ #pdb.set_trace()
154
+ if isinstance(m, nn.Linear):
155
+ trunc_normal_(m.weight, std=.02)
156
+ if isinstance(m, nn.Linear) and m.bias is not None:
157
+ nn.init.constant_(m.bias, 0)
158
+ elif isinstance(m, nn.LayerNorm):
159
+ nn.init.constant_(m.bias, 0)
160
+ nn.init.constant_(m.weight, 1.0)
161
+
162
+ def forward(self, x, attn_mask=None):
163
+ #pdb.set_trace()
164
+ pos_embed = get_abs_pos(self.pos_embed, x.size(1))
165
+
166
+ x = self.kv_proj(x)
167
+ x = self.ln_kv(x).permute(1, 0, 2)
168
+
169
+ N = x.shape[1]
170
+ q = self.ln_q(self.query)
171
+ out = self.attn(
172
+ self._repeat(q, N) + self.pos_embed.unsqueeze(1),
173
+ x + pos_embed.unsqueeze(1),
174
+ x,
175
+ attn_mask=attn_mask)[0]
176
+ return out.permute(1, 0, 2)
177
+
178
+ def _repeat(self, query, N: int):
179
+ return query.unsqueeze(1).repeat(1, N, 1)
180
+
181
+
182
+ class VisualAttention(nn.Module):
183
+ """self-attention layer class.
184
+
185
+ Self-attention layer takes input with size [s, b, h]
186
+ and returns output of the same size.
187
+ """
188
+
189
+ def __init__(self, embed_dim, num_heads,
190
+ bias=True, kdim=None, vdim=None):
191
+ super(VisualAttention, self).__init__()
192
+ self.embed_dim = embed_dim
193
+ self.kdim = kdim if kdim is not None else embed_dim
194
+ self.vdim = vdim if vdim is not None else embed_dim
195
+ self._qkv_same_embed_dim = self.kdim == embed_dim and self.vdim == embed_dim
196
+
197
+ self.num_heads = num_heads
198
+
199
+ # Per attention head and per partition values.
200
+ assert embed_dim % num_heads == 0
201
+ self.hidden_size_per_attention_head = embed_dim // num_heads
202
+ self.num_attention_heads_per_partition = num_heads
203
+ self.hidden_size_per_partition = embed_dim
204
+
205
+ # Strided linear layer.
206
+ assert self._qkv_same_embed_dim, 'Only Support SelfAttention Currently'
207
+ self.in_proj = nn.Linear(embed_dim, 3 * embed_dim)
208
+ self.out_proj = nn.Linear(embed_dim, embed_dim)
209
+ self.norm_factor = math.sqrt(self.hidden_size_per_attention_head)
210
+
211
+ def forward(self, query, key, value, attn_mask = None):
212
+ # query/key/value: [sq, b, h]
213
+ sq, b, _ = query.size()
214
+
215
+ assert torch.allclose(query, key), 'Only Support Self-Attention Currently'
216
+ sk = sq
217
+ mixed_x_layer = self.in_proj(query)
218
+
219
+ # [sq, b, (np * 3 * hn)] --> [sq, b, np, 3 * hn]
220
+ new_tensor_shape = mixed_x_layer.size()[:-1] + \
221
+ (self.num_attention_heads_per_partition,
222
+ 3 * self.hidden_size_per_attention_head)
223
+ mixed_x_layer = mixed_x_layer.view(*new_tensor_shape)
224
+
225
+ # [sq, b, np, 3 * hn] --> 3 [sq, b, np, hn]
226
+ query_layer, key_layer, value_layer = mixed_x_layer.split(
227
+ self.hidden_size_per_attention_head, dim=-1)
228
+
229
+ # [sq, b, np, hn] -> [sq, b * np, hn]
230
+ query_layer = query_layer.view(sq,
231
+ b * self.num_attention_heads_per_partition,
232
+ self.hidden_size_per_attention_head).transpose(0, 1)
233
+ # [sk, b, np, hn] -> [sk, b * np, hn]
234
+ key_layer = key_layer.view(sk,
235
+ b * self.num_attention_heads_per_partition,
236
+ self.hidden_size_per_attention_head).transpose(0, 1)
237
+
238
+ q_scaled = query_layer / self.norm_factor
239
+ if attn_mask is not None:
240
+ attention_probs = torch.baddbmm(attn_mask, q_scaled, key_layer.transpose(-2, -1))
241
+ else:
242
+ attention_probs = torch.bmm(q_scaled, key_layer.transpose(-2, -1))
243
+ attention_probs = attention_probs.softmax(dim=-1)
244
+
245
+ value_layer = value_layer.view(sk,
246
+ b * self.num_attention_heads_per_partition,
247
+ self.hidden_size_per_attention_head).transpose(0, 1)
248
+
249
+ # matmul: [b * np, sq, hn]
250
+ context_layer = torch.bmm(attention_probs, value_layer)
251
+
252
+ # change view [b, np, sq, hn]
253
+ context_layer = context_layer.view(b,
254
+ self.num_attention_heads_per_partition,
255
+ sq, self.hidden_size_per_attention_head)
256
+
257
+ # [b, np, sq, hn] --> [sq, b, np, hn]
258
+ context_layer = context_layer.permute(2, 0, 1, 3).contiguous()
259
+
260
+ # [sq, b, np, hn] --> [sq, b, hp]
261
+ new_context_layer_shape = context_layer.size()[:-2] + \
262
+ (self.hidden_size_per_partition,)
263
+ context_layer = context_layer.view(*new_context_layer_shape)
264
+
265
+ output = self.out_proj(context_layer)
266
+
267
+ return output
268
+
269
+
270
+ class VisualAttentionBlock(nn.Module):
271
+ def __init__(
272
+ self,
273
+ d_model: int,
274
+ n_head: int,
275
+ mlp_ratio: float = 4.0,
276
+ act_layer: Callable = nn.GELU,
277
+ norm_layer: Callable = nn.LayerNorm,
278
+ is_cross_attention: bool = False,
279
+ ):
280
+ super().__init__()
281
+
282
+ self.ln_1 = norm_layer(d_model)
283
+ if is_cross_attention:
284
+ self.ln_1_kv = norm_layer(d_model)
285
+
286
+ self.ln_2 = norm_layer(d_model)
287
+ mlp_width = int(d_model * mlp_ratio)
288
+ self.attn = VisualAttention(d_model, n_head)
289
+ self.mlp = nn.Sequential(OrderedDict([
290
+ ("c_fc", nn.Linear(d_model, mlp_width)),
291
+ ("gelu", act_layer()),
292
+ ("c_proj", nn.Linear(mlp_width, d_model))
293
+ ]))
294
+
295
+ def attention(
296
+ self,
297
+ q_x: torch.Tensor,
298
+ k_x: Optional[torch.Tensor] = None,
299
+ v_x: Optional[torch.Tensor] = None,
300
+ attn_mask: Optional[torch.Tensor] = None,
301
+ ):
302
+ k_x = k_x if k_x is not None else q_x
303
+ v_x = v_x if v_x is not None else q_x
304
+
305
+ attn_mask = attn_mask.to(q_x.dtype) if attn_mask is not None else None
306
+ return self.attn(q_x, k_x, v_x, attn_mask=attn_mask)
307
+
308
+ def forward(
309
+ self,
310
+ q_x: torch.Tensor,
311
+ k_x: Optional[torch.Tensor] = None,
312
+ v_x: Optional[torch.Tensor] = None,
313
+ attn_mask: Optional[torch.Tensor] = None,
314
+ ):
315
+ k_x = self.ln_1_kv(k_x) if hasattr(self, "ln_1_kv") and k_x is not None else None
316
+ v_x = self.ln_1_kv(v_x) if hasattr(self, "ln_1_kv") and v_x is not None else None
317
+
318
+ x = q_x + self.attention(q_x=self.ln_1(q_x), k_x=k_x, v_x=v_x, attn_mask=attn_mask)
319
+ x = x + self.mlp(self.ln_2(x))
320
+ return x
321
+
322
+
323
+ class TransformerBlock(nn.Module):
324
+ def __init__(
325
+ self,
326
+ width: int,
327
+ layers: int,
328
+ heads: int,
329
+ mlp_ratio: float = 4.0,
330
+ act_layer: Callable = nn.GELU,
331
+ norm_layer: Callable = nn.LayerNorm,
332
+ ):
333
+ super().__init__()
334
+ self.width = width
335
+ self.layers = layers
336
+
337
+ self.resblocks = nn.ModuleList([
338
+ VisualAttentionBlock(
339
+ width, heads, mlp_ratio, act_layer=act_layer, norm_layer=norm_layer)
340
+ for _ in range(layers)
341
+ ])
342
+
343
+ def get_cast_dtype(self) -> torch.dtype:
344
+ return self.resblocks[0].mlp.c_fc.weight.dtype
345
+
346
+ def get_cast_device(self) -> torch.device:
347
+ return self.resblocks[0].mlp.c_fc.weight.device
348
+
349
+ def forward(self, x: torch.Tensor, attn_mask: Optional[torch.Tensor] = None):
350
+ for r in self.resblocks:
351
+ x = r(x, attn_mask=attn_mask)
352
+ return x
353
+
354
+
355
+ class VisionTransformer(nn.Module):
356
+
357
+ def __init__(
358
+ self,
359
+ image_size: int,
360
+ patch_size: int,
361
+ width: int,
362
+ layers: int,
363
+ heads: int,
364
+ mlp_ratio: float,
365
+ n_queries: int = 256,
366
+ output_dim: int = 512,
367
+ **kwargs
368
+ ):
369
+ super().__init__()
370
+ image_height, image_width = self.image_size = (image_size, image_size)
371
+ patch_height, patch_width = self.patch_size = (patch_size, patch_size)
372
+ self.grid_size = (image_height // patch_height, image_width // patch_width)
373
+ self.output_dim = output_dim
374
+
375
+ mean = (0.48145466, 0.4578275, 0.40821073)
376
+ std = (0.26862954, 0.26130258, 0.27577711)
377
+ self.image_transform = transforms.Compose([
378
+ transforms.Resize(
379
+ (image_size*2, image_size*2),
380
+ interpolation=InterpolationMode.BICUBIC
381
+ ),
382
+ transforms.ToTensor(),
383
+ transforms.Normalize(mean=mean, std=std),
384
+ ])
385
+
386
+ self.conv1 = nn.Conv2d(in_channels=3, out_channels=width, kernel_size=patch_size, stride=patch_size, bias=False)
387
+
388
+ # class embeddings and positional embeddings
389
+ scale = width ** -0.5
390
+ self.positional_embedding = nn.Parameter(scale * torch.randn(256, width))
391
+
392
+ norm_layer = partial(nn.LayerNorm, eps=1e-6)
393
+ act_layer = nn.GELU
394
+
395
+ self.ln_pre = norm_layer(width)
396
+ self.transformer = TransformerBlock(
397
+ width,
398
+ layers,
399
+ heads,
400
+ mlp_ratio,
401
+ act_layer=act_layer,
402
+ norm_layer=norm_layer,
403
+ )
404
+ # pdb.set_trace()
405
+ self.attn_pool = Resampler(
406
+ grid_size=int(math.sqrt(n_queries)),
407
+ embed_dim=output_dim,
408
+ num_heads=output_dim // 128,
409
+ kv_dim=width,
410
+ norm_layer=norm_layer,
411
+ )
412
+ self.attn_pool2 = Resampler(
413
+ grid_size=int(math.sqrt(n_queries)),
414
+ embed_dim=output_dim,
415
+ num_heads=output_dim // 128,
416
+ kv_dim=width,
417
+ norm_layer=norm_layer,
418
+ )
419
+ self.ln_post = norm_layer(output_dim)
420
+ self.proj = nn.Parameter((output_dim** -0.5) * torch.randn(output_dim, output_dim))
421
+ # self.attn_pool2.load_state_dict(torch.load('/cfs/cfs-lugcocyb/mingdali/code/qWen-VL/vl-chat/attn_params.pth'))
422
+
423
+ # def initialize_vision_modules(self,lpath):
424
+ # self.attn_pool2[0].load_state_dict(torch.load(lpath))
425
+
426
+ def forward(self, x: torch.Tensor):
427
+ #pdb.set_trace()
428
+ #torch.save(self.attn_pool.state_dict(), '/cfs/cfs-lugcocyb/mingdali/code/qWen-VL/vl-chat/attn_params.pth')
429
+ x = x.to(
430
+ dtype=self.transformer.get_cast_dtype(),
431
+ device=self.transformer.get_cast_device(),
432
+ )
433
+ # to patches
434
+ x = self.conv1(x) # shape = [*, width, grid, grid]
435
+ x = x.reshape(x.shape[0], x.shape[1], -1) # shape = [*, width, grid ** 2]
436
+ x = x.permute(0, 2, 1) # shape = [*, grid ** 2, width]
437
+
438
+ x = x + get_abs_pos(self.positional_embedding, x.size(1))
439
+
440
+ x = self.ln_pre(x)
441
+
442
+ x = x.permute(1, 0, 2) # NLD -> LND
443
+ x = self.transformer(x)
444
+ x = x.permute(1, 0, 2) # LND -> NLD
445
+ # pdb.set_trace()
446
+ src_size = int(math.sqrt(x.shape[1]))
447
+ x = x.reshape(x.shape[0]//5,5,-1, x.shape[-1])
448
+ x1 = x[:,4,:,:]
449
+ x = x[:,:4,:,:]
450
+ x = x.reshape(x.shape[0], -1, src_size, src_size, x.shape[-1])
451
+ x = x.transpose(1,2).reshape(x.shape[0], src_size,2,2, src_size, x.shape[-1])
452
+ x = x.transpose(1,2).reshape(x.shape[0], -1, x.shape[-1])
453
+ x = self.attn_pool2(x)
454
+ x1 = self.attn_pool(x1)
455
+ x = self.post_pro(x)
456
+ x1 = self.post_pro(x1)
457
+ # return x1
458
+ return torch.cat([x,x1],dim=1)
459
+
460
+ def post_pro(self, x):
461
+ x = self.ln_post(x)
462
+ x = x @ self.proj
463
+ return x
464
+
465
+
466
+ def encode(self, image_paths: List[str]):
467
+ images = []
468
+ # pdb.set_trace()
469
+ for image_path in image_paths:
470
+ try:
471
+ if image_path.startswith("http://") or image_path.startswith("https://"):
472
+ image = Image.open(requests.get(image_path, stream=True).raw)
473
+ else:
474
+ image = self.image_transform(Image.open(image_path).convert("RGB"))
475
+ except:
476
+ image = torch.zeros((3, 448*2, 448*2))
477
+ # pdb.set_trace()
478
+ images.append(image)
479
+ images = torch.stack(images, dim=0)
480
+ windows = sliding_window(images,window_size=(448,448),stride=448)
481
+ return self(windows)
482
+