medxcribe commited on
Commit
547d3e3
·
1 Parent(s): 8f97bb2

Add ONNX models to /onnx folder

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "./whisper-base-en",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "apply_spec_augment": false,
@@ -44,8 +44,7 @@
44
  "pad_token_id": 50256,
45
  "scale_embedding": false,
46
  "suppress_tokens": [],
47
- "torch_dtype": "float32",
48
- "transformers_version": "4.35.2",
49
  "use_cache": true,
50
  "use_weighted_layer_sum": false,
51
  "vocab_size": 51864
 
1
  {
2
+ "_name_or_path": "medxcribe/whisper-base.en",
3
  "activation_dropout": 0.0,
4
  "activation_function": "gelu",
5
  "apply_spec_augment": false,
 
44
  "pad_token_id": 50256,
45
  "scale_embedding": false,
46
  "suppress_tokens": [],
47
+ "transformers_version": "4.38.2",
 
48
  "use_cache": true,
49
  "use_weighted_layer_sum": false,
50
  "vocab_size": 51864
generation_config.json CHANGED
@@ -133,5 +133,6 @@
133
  50360,
134
  50361
135
  ],
136
- "transformers_version": "4.35.2"
 
137
  }
 
133
  50360,
134
  50361
135
  ],
136
+ "transformers_version": "4.38.2",
137
+ "trust_remote_code": false
138
  }
onnx/decoder_model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d3c2f7bb5aed65cd9637410c939e3fa6e971e829253a6e643e589c4588555ce
3
+ size 208280583
onnx/decoder_model_bnb4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6eb7b18a5ae9811e7c9f31739880b3c8e99d9b8930307539d34f6aeef1c7744e
3
+ size 121782490
onnx/decoder_model_fp16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42eb919a59a7e7748d1dc860430472ba5dde3d55f4b7e594f49b68a41c307ccc
3
+ size 104279011
onnx/decoder_model_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d05e1c9496b756e0495dbcd34b0abf011e538e477e4bb30b77010ec75a42c2a
3
+ size 159410870
onnx/decoder_model_merged.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97ac9796b79649560d4d037ec3e882570455a4ce2be33211087431a0f379f903
3
+ size 208593457
onnx/decoder_model_merged_bnb4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:662ce2e2fbd67bdc2b2db5eb26c57f128a8993e0c433139133db660b1d53f835
3
+ size 122102394
onnx/decoder_model_merged_fp16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:396e7df8e8541e4790d22bc4a7471eed2bc99662cf6df3402f49cef061c2eace
3
+ size 104595806
onnx/decoder_model_merged_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17a0d08be5d62bcc4bf2a0acf9e007c4a26f103b668fd7afe64a82f7a8e0132c
3
+ size 159784439
onnx/decoder_model_merged_q4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5c88eea3e215bbb265478d5e6c3e802c6788077935a88db96ff9e0bb1fd4e86
3
+ size 123674348
onnx/decoder_model_merged_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17a0d08be5d62bcc4bf2a0acf9e007c4a26f103b668fd7afe64a82f7a8e0132c
3
+ size 159784439
onnx/decoder_model_merged_uint8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7dac37e938a6cdf0b6588d05d50bcea1fd9a7fb756a789f0249af5ba10def8f1
3
+ size 159784400
onnx/decoder_model_q4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb599c6b6674827738eee81c0f2cfbdfa83faf5adabe2e5cc02efd3142e13e54
3
+ size 123354874
onnx/decoder_model_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d05e1c9496b756e0495dbcd34b0abf011e538e477e4bb30b77010ec75a42c2a
3
+ size 159410870
onnx/decoder_model_uint8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92fe2ed1cbafa6cbd3f9d51cb6b421cc43ade6b1b438ba9923b7152ea3e7bd0d
3
+ size 159410903
onnx/decoder_with_past_model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d22b5f31c08cf20ad8e2ead52ca4eb5a1c79a7dd4857c0bdba91914b7d721bd2
3
+ size 195675133
onnx/decoder_with_past_model_bnb4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e56e44475cb06ce05ffe5d476e8a8f37a0e4db603e945d07aeb7aabe9ffb661
3
+ size 119988596
onnx/decoder_with_past_model_fp16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d70cd075b0d5e32932522dee9dde39a784bef298a46b56f528c17bc60a3a06a
3
+ size 97973240
onnx/decoder_with_past_model_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adab4e4c9167e43b995e552f70721e6db0c66b44f8a54adb0e8660ad5b5a823d
3
+ size 156230721
onnx/decoder_with_past_model_q4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b02579314508b58f0c04da8876b66e2e5145ca9117b364258ea48aee0de586f2
3
+ size 121364468
onnx/decoder_with_past_model_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adab4e4c9167e43b995e552f70721e6db0c66b44f8a54adb0e8660ad5b5a823d
3
+ size 156230721
onnx/decoder_with_past_model_uint8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:494567e281607fd66d5cd65b6b43585929d205a0a039dc796a6d5c89af70fc53
3
+ size 156230750
onnx/encoder_model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:640d3fc87c5ae556254b6ad920392d0ca11a99e2e978f69846cf7d5df14910b8
3
+ size 82449726
onnx/encoder_model_bnb4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7efe981387ad6809649bd3e2fdc7f0ecf037cdd22a24894c80d71ecc46b3dfe9
3
+ size 17574739
onnx/encoder_model_fp16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8eba79bbb2ead961abf6a7f86499d9c2c5c5f48abe0681bf5047eb5e573f5064
3
+ size 41268727
onnx/encoder_model_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a84f3f55562139c34ad69a28bd4848d0cb15f3aadce98ec413a88db669963185
3
+ size 23137412
onnx/encoder_model_q4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcd3dad2752b8b96aa23c8213fdceb09b6daf44ac451fbec6c5ce937c3900b7b
3
+ size 18754099
onnx/encoder_model_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46b75f5a5876ca67ebaf35cb8ed95b73410c8c476cd9c33a218df1c028afedcc
3
+ size 23137435
onnx/encoder_model_uint8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46b75f5a5876ca67ebaf35cb8ed95b73410c8c476cd9c33a218df1c028afedcc
3
+ size 23137435
quantize_config.json ADDED
@@ -0,0 +1,330 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fp16": {},
3
+ "q8": {
4
+ "per_model_config": {
5
+ "encoder_model": {
6
+ "op_types": [
7
+ "Add",
8
+ "Conv",
9
+ "Div",
10
+ "Erf",
11
+ "MatMul",
12
+ "Mul",
13
+ "Pow",
14
+ "ReduceMean",
15
+ "Reshape",
16
+ "Softmax",
17
+ "Sqrt",
18
+ "Sub",
19
+ "Transpose"
20
+ ],
21
+ "weight_type": "QUInt8"
22
+ },
23
+ "decoder_model": {
24
+ "op_types": [
25
+ "Add",
26
+ "Concat",
27
+ "ConstantOfShape",
28
+ "Div",
29
+ "Equal",
30
+ "Erf",
31
+ "Expand",
32
+ "Gather",
33
+ "Less",
34
+ "MatMul",
35
+ "Mul",
36
+ "Pow",
37
+ "Range",
38
+ "ReduceMean",
39
+ "Reshape",
40
+ "Shape",
41
+ "Slice",
42
+ "Softmax",
43
+ "Sqrt",
44
+ "Squeeze",
45
+ "Sub",
46
+ "Transpose",
47
+ "Unsqueeze",
48
+ "Where"
49
+ ],
50
+ "weight_type": "QInt8"
51
+ },
52
+ "decoder_with_past_model": {
53
+ "op_types": [
54
+ "Add",
55
+ "Concat",
56
+ "Div",
57
+ "Erf",
58
+ "Gather",
59
+ "MatMul",
60
+ "Mul",
61
+ "Pow",
62
+ "ReduceMean",
63
+ "Reshape",
64
+ "Shape",
65
+ "Slice",
66
+ "Softmax",
67
+ "Sqrt",
68
+ "Sub",
69
+ "Transpose",
70
+ "Unsqueeze"
71
+ ],
72
+ "weight_type": "QInt8"
73
+ },
74
+ "decoder_model_merged": {
75
+ "op_types": [
76
+ "Add",
77
+ "Concat",
78
+ "Constant",
79
+ "ConstantOfShape",
80
+ "Div",
81
+ "Equal",
82
+ "Erf",
83
+ "Expand",
84
+ "Gather",
85
+ "If",
86
+ "Less",
87
+ "MatMul",
88
+ "Mul",
89
+ "Pow",
90
+ "Range",
91
+ "ReduceMean",
92
+ "Reshape",
93
+ "Shape",
94
+ "Slice",
95
+ "Softmax",
96
+ "Sqrt",
97
+ "Squeeze",
98
+ "Sub",
99
+ "Transpose",
100
+ "Unsqueeze",
101
+ "Where"
102
+ ],
103
+ "weight_type": "QInt8"
104
+ }
105
+ },
106
+ "per_channel": false,
107
+ "reduce_range": false
108
+ },
109
+ "int8": {
110
+ "per_model_config": {
111
+ "encoder_model": {
112
+ "op_types": [
113
+ "Add",
114
+ "Conv",
115
+ "Div",
116
+ "Erf",
117
+ "MatMul",
118
+ "Mul",
119
+ "Pow",
120
+ "ReduceMean",
121
+ "Reshape",
122
+ "Softmax",
123
+ "Sqrt",
124
+ "Sub",
125
+ "Transpose"
126
+ ],
127
+ "weight_type": "QInt8"
128
+ },
129
+ "decoder_model": {
130
+ "op_types": [
131
+ "Add",
132
+ "Concat",
133
+ "ConstantOfShape",
134
+ "Div",
135
+ "Equal",
136
+ "Erf",
137
+ "Expand",
138
+ "Gather",
139
+ "Less",
140
+ "MatMul",
141
+ "Mul",
142
+ "Pow",
143
+ "Range",
144
+ "ReduceMean",
145
+ "Reshape",
146
+ "Shape",
147
+ "Slice",
148
+ "Softmax",
149
+ "Sqrt",
150
+ "Squeeze",
151
+ "Sub",
152
+ "Transpose",
153
+ "Unsqueeze",
154
+ "Where"
155
+ ],
156
+ "weight_type": "QInt8"
157
+ },
158
+ "decoder_with_past_model": {
159
+ "op_types": [
160
+ "Add",
161
+ "Concat",
162
+ "Div",
163
+ "Erf",
164
+ "Gather",
165
+ "MatMul",
166
+ "Mul",
167
+ "Pow",
168
+ "ReduceMean",
169
+ "Reshape",
170
+ "Shape",
171
+ "Slice",
172
+ "Softmax",
173
+ "Sqrt",
174
+ "Sub",
175
+ "Transpose",
176
+ "Unsqueeze"
177
+ ],
178
+ "weight_type": "QInt8"
179
+ },
180
+ "decoder_model_merged": {
181
+ "op_types": [
182
+ "Add",
183
+ "Concat",
184
+ "Constant",
185
+ "ConstantOfShape",
186
+ "Div",
187
+ "Equal",
188
+ "Erf",
189
+ "Expand",
190
+ "Gather",
191
+ "If",
192
+ "Less",
193
+ "MatMul",
194
+ "Mul",
195
+ "Pow",
196
+ "Range",
197
+ "ReduceMean",
198
+ "Reshape",
199
+ "Shape",
200
+ "Slice",
201
+ "Softmax",
202
+ "Sqrt",
203
+ "Squeeze",
204
+ "Sub",
205
+ "Transpose",
206
+ "Unsqueeze",
207
+ "Where"
208
+ ],
209
+ "weight_type": "QInt8"
210
+ }
211
+ },
212
+ "per_channel": false,
213
+ "reduce_range": false
214
+ },
215
+ "uint8": {
216
+ "per_model_config": {
217
+ "encoder_model": {
218
+ "op_types": [
219
+ "Add",
220
+ "Conv",
221
+ "Div",
222
+ "Erf",
223
+ "MatMul",
224
+ "Mul",
225
+ "Pow",
226
+ "ReduceMean",
227
+ "Reshape",
228
+ "Softmax",
229
+ "Sqrt",
230
+ "Sub",
231
+ "Transpose"
232
+ ],
233
+ "weight_type": "QUInt8"
234
+ },
235
+ "decoder_model": {
236
+ "op_types": [
237
+ "Add",
238
+ "Concat",
239
+ "ConstantOfShape",
240
+ "Div",
241
+ "Equal",
242
+ "Erf",
243
+ "Expand",
244
+ "Gather",
245
+ "Less",
246
+ "MatMul",
247
+ "Mul",
248
+ "Pow",
249
+ "Range",
250
+ "ReduceMean",
251
+ "Reshape",
252
+ "Shape",
253
+ "Slice",
254
+ "Softmax",
255
+ "Sqrt",
256
+ "Squeeze",
257
+ "Sub",
258
+ "Transpose",
259
+ "Unsqueeze",
260
+ "Where"
261
+ ],
262
+ "weight_type": "QUInt8"
263
+ },
264
+ "decoder_with_past_model": {
265
+ "op_types": [
266
+ "Add",
267
+ "Concat",
268
+ "Div",
269
+ "Erf",
270
+ "Gather",
271
+ "MatMul",
272
+ "Mul",
273
+ "Pow",
274
+ "ReduceMean",
275
+ "Reshape",
276
+ "Shape",
277
+ "Slice",
278
+ "Softmax",
279
+ "Sqrt",
280
+ "Sub",
281
+ "Transpose",
282
+ "Unsqueeze"
283
+ ],
284
+ "weight_type": "QUInt8"
285
+ },
286
+ "decoder_model_merged": {
287
+ "op_types": [
288
+ "Add",
289
+ "Concat",
290
+ "Constant",
291
+ "ConstantOfShape",
292
+ "Div",
293
+ "Equal",
294
+ "Erf",
295
+ "Expand",
296
+ "Gather",
297
+ "If",
298
+ "Less",
299
+ "MatMul",
300
+ "Mul",
301
+ "Pow",
302
+ "Range",
303
+ "ReduceMean",
304
+ "Reshape",
305
+ "Shape",
306
+ "Slice",
307
+ "Softmax",
308
+ "Sqrt",
309
+ "Squeeze",
310
+ "Sub",
311
+ "Transpose",
312
+ "Unsqueeze",
313
+ "Where"
314
+ ],
315
+ "weight_type": "QUInt8"
316
+ }
317
+ },
318
+ "per_channel": false,
319
+ "reduce_range": false
320
+ },
321
+ "q4": {
322
+ "block_size": 32,
323
+ "is_symmetric": true,
324
+ "accuracy_level": null
325
+ },
326
+ "bnb4": {
327
+ "block_size": 64,
328
+ "quant_type": 1
329
+ }
330
+ }