File size: 1,590 Bytes
4e8c1b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import torch
from transformers import AutoTokenizer, AutoModel, AutoConfig
import os

# Define the model name and output paths
model_name = "ibm-granite/granite-embedding-30m-english"
onnx_model_path = "./granite_embedding_model.onnx"
tokenizer_path = "./tokenizer"
config_path = "./config"

# Load the model, tokenizer, and config
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
config = AutoConfig.from_pretrained(model_name)

# Save the tokenizer and config for later use
tokenizer.save_pretrained(tokenizer_path)
config.save_pretrained(config_path)

# Set the model to evaluation mode
model.eval()

# Example input for tracing
dummy_input = tokenizer("This is a test sentence.", return_tensors="pt")
input_ids = dummy_input["input_ids"]
attention_mask = dummy_input["attention_mask"]

# Export the model to ONNX
torch.onnx.export(
    model,
    (input_ids, attention_mask),  # The model's inputs
    onnx_model_path,  # Path to save the ONNX model
    input_names=["input_ids", "attention_mask"],  # Input names
    output_names=["output"],  # Output names
    dynamic_axes={
        "input_ids": {
            0: "batch_size",
            1: "sequence_length",
        },  # Batch size and sequence length can vary
        "attention_mask": {0: "batch_size", 1: "sequence_length"},
        "output": {0: "batch_size", 1: "sequence_length"},
    },
    opset_version=14,  # ONNX opset version
)

print(f"Model saved as ONNX to {onnx_model_path}")
print(f"Tokenizer saved to {tokenizer_path}")
print(f"Config saved to {config_path}")