File size: 1,283 Bytes
4e8c1b1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
# import onnx
# # Load the ONNX model
# model_path = "model_uint8.onnx" # Replace with the path to your ONNX model
# onnx_model = onnx.load(model_path)
# # Print model's input and output shapes
# for input_tensor in onnx_model.graph.input:
# print(f"Input Name: {input_tensor.name}")
# print(
# f"Input Shape: {[dim.dim_value for dim in input_tensor.type.tensor_type.shape.dim]}"
# )
# for output_tensor in onnx_model.graph.output:
# print(f"Output Name: {output_tensor.name}")
# print(
# f"Output Shape: {[dim.dim_value for dim in output_tensor.type.tensor_type.shape.dim]}"
# )
from onnxruntime.quantization import quantize_dynamic, QuantType
# Define the path to the original ONNX model and the quantized output model
onnx_model_path = "./granite_embedding_model.onnx" # Path to the original ONNX model
quantized_model_path = "./model_uint8.onnx" # Path to save the quantized ONNX model
# Perform dynamic quantization to UInt8
quantize_dynamic(
model_input=onnx_model_path, # Input ONNX model path
model_output=quantized_model_path, # Output quantized model path
weight_type=QuantType.QUInt8, # Use UInt8 for weights
)
# Print confirmation of quantization
print(f"Quantized model saved to {quantized_model_path}")
|