medxcribe commited on Dec 30, 2024

Commit

547d3e3

1 Parent(s): 8f97bb2

Add ONNX models to /onnx folder

Browse files

Files changed (31) hide show

config.json +2 -3
generation_config.json +2 -1
onnx/decoder_model.onnx +3 -0
onnx/decoder_model_bnb4.onnx +3 -0
onnx/decoder_model_fp16.onnx +3 -0
onnx/decoder_model_int8.onnx +3 -0
onnx/decoder_model_merged.onnx +3 -0
onnx/decoder_model_merged_bnb4.onnx +3 -0
onnx/decoder_model_merged_fp16.onnx +3 -0
onnx/decoder_model_merged_int8.onnx +3 -0
onnx/decoder_model_merged_q4.onnx +3 -0
onnx/decoder_model_merged_quantized.onnx +3 -0
onnx/decoder_model_merged_uint8.onnx +3 -0
onnx/decoder_model_q4.onnx +3 -0
onnx/decoder_model_quantized.onnx +3 -0
onnx/decoder_model_uint8.onnx +3 -0
onnx/decoder_with_past_model.onnx +3 -0
onnx/decoder_with_past_model_bnb4.onnx +3 -0
onnx/decoder_with_past_model_fp16.onnx +3 -0
onnx/decoder_with_past_model_int8.onnx +3 -0
onnx/decoder_with_past_model_q4.onnx +3 -0
onnx/decoder_with_past_model_quantized.onnx +3 -0
onnx/decoder_with_past_model_uint8.onnx +3 -0
onnx/encoder_model.onnx +3 -0
onnx/encoder_model_bnb4.onnx +3 -0
onnx/encoder_model_fp16.onnx +3 -0
onnx/encoder_model_int8.onnx +3 -0
onnx/encoder_model_q4.onnx +3 -0
onnx/encoder_model_quantized.onnx +3 -0
onnx/encoder_model_uint8.onnx +3 -0
quantize_config.json +330 -0

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "./whisper-base-en",
   "activation_dropout": 0.0,
   "activation_function": "gelu",
   "apply_spec_augment": false,
@@ -44,8 +44,7 @@
   "pad_token_id": 50256,
   "scale_embedding": false,
   "suppress_tokens": [],
-  "torch_dtype": "float32",
-  "transformers_version": "4.35.2",
   "use_cache": true,
   "use_weighted_layer_sum": false,
   "vocab_size": 51864

 {
+  "_name_or_path": "medxcribe/whisper-base.en",
   "activation_dropout": 0.0,
   "activation_function": "gelu",
   "apply_spec_augment": false,
   "pad_token_id": 50256,
   "scale_embedding": false,
   "suppress_tokens": [],
+  "transformers_version": "4.38.2",
   "use_cache": true,
   "use_weighted_layer_sum": false,
   "vocab_size": 51864

generation_config.json CHANGED Viewed

@@ -133,5 +133,6 @@
     50360,
     50361
   ],
-  "transformers_version": "4.35.2"
 }

     50360,
     50361
   ],
+  "transformers_version": "4.38.2",
+  "trust_remote_code": false
 }

onnx/decoder_model.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4d3c2f7bb5aed65cd9637410c939e3fa6e971e829253a6e643e589c4588555ce
+size 208280583

onnx/decoder_model_bnb4.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6eb7b18a5ae9811e7c9f31739880b3c8e99d9b8930307539d34f6aeef1c7744e
+size 121782490

onnx/decoder_model_fp16.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:42eb919a59a7e7748d1dc860430472ba5dde3d55f4b7e594f49b68a41c307ccc
+size 104279011

onnx/decoder_model_int8.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d05e1c9496b756e0495dbcd34b0abf011e538e477e4bb30b77010ec75a42c2a
+size 159410870

onnx/decoder_model_merged.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:97ac9796b79649560d4d037ec3e882570455a4ce2be33211087431a0f379f903
+size 208593457

onnx/decoder_model_merged_bnb4.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:662ce2e2fbd67bdc2b2db5eb26c57f128a8993e0c433139133db660b1d53f835
+size 122102394

onnx/decoder_model_merged_fp16.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:396e7df8e8541e4790d22bc4a7471eed2bc99662cf6df3402f49cef061c2eace
+size 104595806

onnx/decoder_model_merged_int8.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:17a0d08be5d62bcc4bf2a0acf9e007c4a26f103b668fd7afe64a82f7a8e0132c
+size 159784439

onnx/decoder_model_merged_q4.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a5c88eea3e215bbb265478d5e6c3e802c6788077935a88db96ff9e0bb1fd4e86
+size 123674348

onnx/decoder_model_merged_quantized.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:17a0d08be5d62bcc4bf2a0acf9e007c4a26f103b668fd7afe64a82f7a8e0132c
+size 159784439

onnx/decoder_model_merged_uint8.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7dac37e938a6cdf0b6588d05d50bcea1fd9a7fb756a789f0249af5ba10def8f1
+size 159784400

onnx/decoder_model_q4.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bb599c6b6674827738eee81c0f2cfbdfa83faf5adabe2e5cc02efd3142e13e54
+size 123354874

onnx/decoder_model_quantized.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0d05e1c9496b756e0495dbcd34b0abf011e538e477e4bb30b77010ec75a42c2a
+size 159410870

onnx/decoder_model_uint8.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:92fe2ed1cbafa6cbd3f9d51cb6b421cc43ade6b1b438ba9923b7152ea3e7bd0d
+size 159410903

onnx/decoder_with_past_model.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d22b5f31c08cf20ad8e2ead52ca4eb5a1c79a7dd4857c0bdba91914b7d721bd2
+size 195675133

onnx/decoder_with_past_model_bnb4.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4e56e44475cb06ce05ffe5d476e8a8f37a0e4db603e945d07aeb7aabe9ffb661
+size 119988596

onnx/decoder_with_past_model_fp16.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1d70cd075b0d5e32932522dee9dde39a784bef298a46b56f528c17bc60a3a06a
+size 97973240

onnx/decoder_with_past_model_int8.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:adab4e4c9167e43b995e552f70721e6db0c66b44f8a54adb0e8660ad5b5a823d
+size 156230721

onnx/decoder_with_past_model_q4.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b02579314508b58f0c04da8876b66e2e5145ca9117b364258ea48aee0de586f2
+size 121364468

onnx/decoder_with_past_model_quantized.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:adab4e4c9167e43b995e552f70721e6db0c66b44f8a54adb0e8660ad5b5a823d
+size 156230721

onnx/decoder_with_past_model_uint8.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:494567e281607fd66d5cd65b6b43585929d205a0a039dc796a6d5c89af70fc53
+size 156230750

onnx/encoder_model.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:640d3fc87c5ae556254b6ad920392d0ca11a99e2e978f69846cf7d5df14910b8
+size 82449726

onnx/encoder_model_bnb4.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7efe981387ad6809649bd3e2fdc7f0ecf037cdd22a24894c80d71ecc46b3dfe9
+size 17574739

onnx/encoder_model_fp16.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8eba79bbb2ead961abf6a7f86499d9c2c5c5f48abe0681bf5047eb5e573f5064
+size 41268727

onnx/encoder_model_int8.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a84f3f55562139c34ad69a28bd4848d0cb15f3aadce98ec413a88db669963185
+size 23137412

onnx/encoder_model_q4.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bcd3dad2752b8b96aa23c8213fdceb09b6daf44ac451fbec6c5ce937c3900b7b
+size 18754099

onnx/encoder_model_quantized.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:46b75f5a5876ca67ebaf35cb8ed95b73410c8c476cd9c33a218df1c028afedcc
+size 23137435

onnx/encoder_model_uint8.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:46b75f5a5876ca67ebaf35cb8ed95b73410c8c476cd9c33a218df1c028afedcc
+size 23137435

quantize_config.json ADDED Viewed

	@@ -0,0 +1,330 @@

+{
+    "fp16": {},
+    "q8": {
+        "per_model_config": {
+            "encoder_model": {
+                "op_types": [
+                    "Add",
+                    "Conv",
+                    "Div",
+                    "Erf",
+                    "MatMul",
+                    "Mul",
+                    "Pow",
+                    "ReduceMean",
+                    "Reshape",
+                    "Softmax",
+                    "Sqrt",
+                    "Sub",
+                    "Transpose"
+                ],
+                "weight_type": "QUInt8"
+            },
+            "decoder_model": {
+                "op_types": [
+                    "Add",
+                    "Concat",
+                    "ConstantOfShape",
+                    "Div",
+                    "Equal",
+                    "Erf",
+                    "Expand",
+                    "Gather",
+                    "Less",
+                    "MatMul",
+                    "Mul",
+                    "Pow",
+                    "Range",
+                    "ReduceMean",
+                    "Reshape",
+                    "Shape",
+                    "Slice",
+                    "Softmax",
+                    "Sqrt",
+                    "Squeeze",
+                    "Sub",
+                    "Transpose",
+                    "Unsqueeze",
+                    "Where"
+                ],
+                "weight_type": "QInt8"
+            },
+            "decoder_with_past_model": {
+                "op_types": [
+                    "Add",
+                    "Concat",
+                    "Div",
+                    "Erf",
+                    "Gather",
+                    "MatMul",
+                    "Mul",
+                    "Pow",
+                    "ReduceMean",
+                    "Reshape",
+                    "Shape",
+                    "Slice",
+                    "Softmax",
+                    "Sqrt",
+                    "Sub",
+                    "Transpose",
+                    "Unsqueeze"
+                ],
+                "weight_type": "QInt8"
+            },
+            "decoder_model_merged": {
+                "op_types": [
+                    "Add",
+                    "Concat",
+                    "Constant",
+                    "ConstantOfShape",
+                    "Div",
+                    "Equal",
+                    "Erf",
+                    "Expand",
+                    "Gather",
+                    "If",
+                    "Less",
+                    "MatMul",
+                    "Mul",
+                    "Pow",
+                    "Range",
+                    "ReduceMean",
+                    "Reshape",
+                    "Shape",
+                    "Slice",
+                    "Softmax",
+                    "Sqrt",
+                    "Squeeze",
+                    "Sub",
+                    "Transpose",
+                    "Unsqueeze",
+                    "Where"
+                ],
+                "weight_type": "QInt8"
+            }
+        },
+        "per_channel": false,
+        "reduce_range": false
+    },
+    "int8": {
+        "per_model_config": {
+            "encoder_model": {
+                "op_types": [
+                    "Add",
+                    "Conv",
+                    "Div",
+                    "Erf",
+                    "MatMul",
+                    "Mul",
+                    "Pow",
+                    "ReduceMean",
+                    "Reshape",
+                    "Softmax",
+                    "Sqrt",
+                    "Sub",
+                    "Transpose"
+                ],
+                "weight_type": "QInt8"
+            },
+            "decoder_model": {
+                "op_types": [
+                    "Add",
+                    "Concat",
+                    "ConstantOfShape",
+                    "Div",
+                    "Equal",
+                    "Erf",
+                    "Expand",
+                    "Gather",
+                    "Less",
+                    "MatMul",
+                    "Mul",
+                    "Pow",
+                    "Range",
+                    "ReduceMean",
+                    "Reshape",
+                    "Shape",
+                    "Slice",
+                    "Softmax",
+                    "Sqrt",
+                    "Squeeze",
+                    "Sub",
+                    "Transpose",
+                    "Unsqueeze",
+                    "Where"
+                ],
+                "weight_type": "QInt8"
+            },
+            "decoder_with_past_model": {
+                "op_types": [
+                    "Add",
+                    "Concat",
+                    "Div",
+                    "Erf",
+                    "Gather",
+                    "MatMul",
+                    "Mul",
+                    "Pow",
+                    "ReduceMean",
+                    "Reshape",
+                    "Shape",
+                    "Slice",
+                    "Softmax",
+                    "Sqrt",
+                    "Sub",
+                    "Transpose",
+                    "Unsqueeze"
+                ],
+                "weight_type": "QInt8"
+            },
+            "decoder_model_merged": {
+                "op_types": [
+                    "Add",
+                    "Concat",
+                    "Constant",
+                    "ConstantOfShape",
+                    "Div",
+                    "Equal",
+                    "Erf",
+                    "Expand",
+                    "Gather",
+                    "If",
+                    "Less",
+                    "MatMul",
+                    "Mul",
+                    "Pow",
+                    "Range",
+                    "ReduceMean",
+                    "Reshape",
+                    "Shape",
+                    "Slice",
+                    "Softmax",
+                    "Sqrt",
+                    "Squeeze",
+                    "Sub",
+                    "Transpose",
+                    "Unsqueeze",
+                    "Where"
+                ],
+                "weight_type": "QInt8"
+            }
+        },
+        "per_channel": false,
+        "reduce_range": false
+    },
+    "uint8": {
+        "per_model_config": {
+            "encoder_model": {
+                "op_types": [
+                    "Add",
+                    "Conv",
+                    "Div",
+                    "Erf",
+                    "MatMul",
+                    "Mul",
+                    "Pow",
+                    "ReduceMean",
+                    "Reshape",
+                    "Softmax",
+                    "Sqrt",
+                    "Sub",
+                    "Transpose"
+                ],
+                "weight_type": "QUInt8"
+            },
+            "decoder_model": {
+                "op_types": [
+                    "Add",
+                    "Concat",
+                    "ConstantOfShape",
+                    "Div",
+                    "Equal",
+                    "Erf",
+                    "Expand",
+                    "Gather",
+                    "Less",
+                    "MatMul",
+                    "Mul",
+                    "Pow",
+                    "Range",
+                    "ReduceMean",
+                    "Reshape",
+                    "Shape",
+                    "Slice",
+                    "Softmax",
+                    "Sqrt",
+                    "Squeeze",
+                    "Sub",
+                    "Transpose",
+                    "Unsqueeze",
+                    "Where"
+                ],
+                "weight_type": "QUInt8"
+            },
+            "decoder_with_past_model": {
+                "op_types": [
+                    "Add",
+                    "Concat",
+                    "Div",
+                    "Erf",
+                    "Gather",
+                    "MatMul",
+                    "Mul",
+                    "Pow",
+                    "ReduceMean",
+                    "Reshape",
+                    "Shape",
+                    "Slice",
+                    "Softmax",
+                    "Sqrt",
+                    "Sub",
+                    "Transpose",
+                    "Unsqueeze"
+                ],
+                "weight_type": "QUInt8"
+            },
+            "decoder_model_merged": {
+                "op_types": [
+                    "Add",
+                    "Concat",
+                    "Constant",
+                    "ConstantOfShape",
+                    "Div",
+                    "Equal",
+                    "Erf",
+                    "Expand",
+                    "Gather",
+                    "If",
+                    "Less",
+                    "MatMul",
+                    "Mul",
+                    "Pow",
+                    "Range",
+                    "ReduceMean",
+                    "Reshape",
+                    "Shape",
+                    "Slice",
+                    "Softmax",
+                    "Sqrt",
+                    "Squeeze",
+                    "Sub",
+                    "Transpose",
+                    "Unsqueeze",
+                    "Where"
+                ],
+                "weight_type": "QUInt8"
+            }
+        },
+        "per_channel": false,
+        "reduce_range": false
+    },
+    "q4": {
+        "block_size": 32,
+        "is_symmetric": true,
+        "accuracy_level": null
+    },
+    "bnb4": {
+        "block_size": 64,
+        "quant_type": 1
+    }
+}