File size: 1,590 Bytes
4e8c1b1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import torch
from transformers import AutoTokenizer, AutoModel, AutoConfig
import os
# Define the model name and output paths
model_name = "ibm-granite/granite-embedding-30m-english"
onnx_model_path = "./granite_embedding_model.onnx"
tokenizer_path = "./tokenizer"
config_path = "./config"
# Load the model, tokenizer, and config
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
config = AutoConfig.from_pretrained(model_name)
# Save the tokenizer and config for later use
tokenizer.save_pretrained(tokenizer_path)
config.save_pretrained(config_path)
# Set the model to evaluation mode
model.eval()
# Example input for tracing
dummy_input = tokenizer("This is a test sentence.", return_tensors="pt")
input_ids = dummy_input["input_ids"]
attention_mask = dummy_input["attention_mask"]
# Export the model to ONNX
torch.onnx.export(
model,
(input_ids, attention_mask), # The model's inputs
onnx_model_path, # Path to save the ONNX model
input_names=["input_ids", "attention_mask"], # Input names
output_names=["output"], # Output names
dynamic_axes={
"input_ids": {
0: "batch_size",
1: "sequence_length",
}, # Batch size and sequence length can vary
"attention_mask": {0: "batch_size", 1: "sequence_length"},
"output": {0: "batch_size", 1: "sequence_length"},
},
opset_version=14, # ONNX opset version
)
print(f"Model saved as ONNX to {onnx_model_path}")
print(f"Tokenizer saved to {tokenizer_path}")
print(f"Config saved to {config_path}")
|