|
import torch |
|
from transformers import AutoModel, AutoProcessor |
|
from PIL import Image |
|
import requests |
|
import os |
|
import onnxruntime as ort |
|
import numpy as np |
|
|
|
|
|
MODEL_NAME = "amaye15/DaViT-Florence-2-large-ft" |
|
CACHE_DIR = os.getcwd() |
|
PROMPT = "<OCR>" |
|
IMAGE_URL = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg?download=true" |
|
ONNX_MODEL_PATH = "model.onnx" |
|
|
|
|
|
model = AutoModel.from_pretrained(MODEL_NAME, trust_remote_code=True, cache_dir=CACHE_DIR) |
|
processor = AutoProcessor.from_pretrained(MODEL_NAME, trust_remote_code=True, cache_dir=CACHE_DIR) |
|
|
|
|
|
image = Image.open(requests.get(IMAGE_URL, stream=True).raw) |
|
inputs = processor(text=PROMPT, images=image, return_tensors="pt") |
|
|
|
|
|
input_names = ["pixel_values"] |
|
output_names = ["output"] |
|
torch.onnx.export( |
|
model, |
|
inputs["pixel_values"], |
|
ONNX_MODEL_PATH, |
|
input_names=input_names, |
|
output_names=output_names, |
|
dynamic_axes={"pixel_values": {0: "batch_size"}, "output": {0: "batch_size"}}, |
|
opset_version=11 |
|
) |
|
|
|
|
|
ort_session = ort.InferenceSession(ONNX_MODEL_PATH) |
|
|
|
|
|
ort_inputs = {"pixel_values": inputs["pixel_values"].numpy()} |
|
|
|
|
|
ort_outs = ort_session.run(None, ort_inputs) |
|
|
|
|
|
print(ort_outs) |
|
|