Upload 10 files

Browse files

Files changed (10) hide show

onnx/config/config.json +27 -0
onnx/granite_embedding_model.onnx +3 -0
onnx/model_uint8.onnx +3 -0
onnx/onnx_conv.py +48 -0
onnx/tokenizer/merges.txt +0 -0
onnx/tokenizer/special_tokens_map.json +51 -0
onnx/tokenizer/tokenizer.json +0 -0
onnx/tokenizer/tokenizer_config.json +58 -0
onnx/tokenizer/vocab.json +0 -0
onnx/tools.py +35 -0

onnx/config/config.json ADDED Viewed

	@@ -0,0 +1,27 @@

+{
+  "_name_or_path": "ibm-granite/granite-embedding-30m-english",
+  "architectures": [
+    "RobertaModel"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 384,
+  "initializer_range": 0.02,
+  "intermediate_size": 1536,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 6,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.46.3",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 50265
+}

onnx/granite_embedding_model.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a723af95b77d48c4bec7f8d71c8091ca9e91dfdab6fef8e16d7a5780a0de7b50
+size 121327615

onnx/model_uint8.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c000bf5e8142c5dd9c14ae1e41c071821d68b90a8ca9e44e633221feb8f87398
+size 30640016

onnx/onnx_conv.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import torch
+from transformers import AutoTokenizer, AutoModel, AutoConfig
+import os
+# Define the model name and output paths
+model_name = "ibm-granite/granite-embedding-30m-english"
+onnx_model_path = "./granite_embedding_model.onnx"
+tokenizer_path = "./tokenizer"
+config_path = "./config"
+# Load the model, tokenizer, and config
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModel.from_pretrained(model_name)
+config = AutoConfig.from_pretrained(model_name)
+# Save the tokenizer and config for later use
+tokenizer.save_pretrained(tokenizer_path)
+config.save_pretrained(config_path)
+# Set the model to evaluation mode
+model.eval()
+# Example input for tracing
+dummy_input = tokenizer("This is a test sentence.", return_tensors="pt")
+input_ids = dummy_input["input_ids"]
+attention_mask = dummy_input["attention_mask"]
+# Export the model to ONNX
+torch.onnx.export(
+    model,
+    (input_ids, attention_mask),  # The model's inputs
+    onnx_model_path,  # Path to save the ONNX model
+    input_names=["input_ids", "attention_mask"],  # Input names
+    output_names=["output"],  # Output names
+    dynamic_axes={
+        "input_ids": {
+            0: "batch_size",
+            1: "sequence_length",
+        },  # Batch size and sequence length can vary
+        "attention_mask": {0: "batch_size", 1: "sequence_length"},
+        "output": {0: "batch_size", 1: "sequence_length"},
+    },
+    opset_version=14,  # ONNX opset version
+)
+print(f"Model saved as ONNX to {onnx_model_path}")
+print(f"Tokenizer saved to {tokenizer_path}")
+print(f"Config saved to {config_path}")

onnx/tokenizer/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

onnx/tokenizer/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

onnx/tokenizer/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

onnx/tokenizer/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50264": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [],
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "errors": "replace",
+  "mask_token": "<mask>",
+  "model_max_length": 512,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "RobertaTokenizer",
+  "trim_offsets": true,
+  "unk_token": "<unk>"
+}

onnx/tokenizer/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

onnx/tools.py ADDED Viewed

	@@ -0,0 +1,35 @@

+# import onnx
+# # Load the ONNX model
+# model_path = "model_uint8.onnx"  # Replace with the path to your ONNX model
+# onnx_model = onnx.load(model_path)
+# # Print model's input and output shapes
+# for input_tensor in onnx_model.graph.input:
+#     print(f"Input Name: {input_tensor.name}")
+#     print(
+#         f"Input Shape: {[dim.dim_value for dim in input_tensor.type.tensor_type.shape.dim]}"
+#     )
+# for output_tensor in onnx_model.graph.output:
+#     print(f"Output Name: {output_tensor.name}")
+#     print(
+#         f"Output Shape: {[dim.dim_value for dim in output_tensor.type.tensor_type.shape.dim]}"
+#     )
+from onnxruntime.quantization import quantize_dynamic, QuantType
+# Define the path to the original ONNX model and the quantized output model
+onnx_model_path = "./granite_embedding_model.onnx"  # Path to the original ONNX model
+quantized_model_path = "./model_uint8.onnx"  # Path to save the quantized ONNX model
+# Perform dynamic quantization to UInt8
+quantize_dynamic(
+    model_input=onnx_model_path,  # Input ONNX model path
+    model_output=quantized_model_path,  # Output quantized model path
+    weight_type=QuantType.QUInt8,  # Use UInt8 for weights
+)
+# Print confirmation of quantization
+print(f"Quantized model saved to {quantized_model_path}")