Spaces:
Runtime error
Runtime error
import os | |
import json | |
import argparse | |
import os.path as osp | |
import cv2 | |
import numpy as np | |
import supervision as sv | |
import onnxruntime as ort | |
from mmengine.utils import ProgressBar | |
try: | |
import torch | |
from torchvision.ops import nms | |
except Exception as e: | |
print(e) | |
BOUNDING_BOX_ANNOTATOR = sv.BoundingBoxAnnotator(thickness=1) | |
MASK_ANNOTATOR = sv.MaskAnnotator() | |
class LabelAnnotator(sv.LabelAnnotator): | |
def resolve_text_background_xyxy( | |
center_coordinates, | |
text_wh, | |
position, | |
): | |
center_x, center_y = center_coordinates | |
text_w, text_h = text_wh | |
return center_x, center_y, center_x + text_w, center_y + text_h | |
LABEL_ANNOTATOR = LabelAnnotator(text_padding=4, | |
text_scale=0.5, | |
text_thickness=1) | |
def parse_args(): | |
parser = argparse.ArgumentParser('YOLO-World ONNX Demo') | |
parser.add_argument('onnx', help='onnx file') | |
parser.add_argument('image', help='image path, include image file or dir.') | |
parser.add_argument( | |
'text', | |
help= | |
'detecting texts (str or json), should be consistent with the ONNX model' | |
) | |
parser.add_argument('--output-dir', | |
default='./output', | |
help='directory to save output files') | |
parser.add_argument('--device', | |
default='cuda:0', | |
help='device used for inference') | |
parser.add_argument( | |
'--onnx-nms', | |
action='store_false', | |
help='whether ONNX model contains NMS and postprocessing') | |
args = parser.parse_args() | |
return args | |
def preprocess(image, size=(640, 640)): | |
h, w = image.shape[:2] | |
max_size = max(h, w) | |
scale_factor = size[0] / max_size | |
pad_h = (max_size - h) // 2 | |
pad_w = (max_size - w) // 2 | |
pad_image = np.zeros((max_size, max_size, 3), dtype=image.dtype) | |
pad_image[pad_h:h + pad_h, pad_w:w + pad_w] = image | |
image = cv2.resize(pad_image, size, | |
interpolation=cv2.INTER_LINEAR).astype('float32') | |
image /= 255.0 | |
image = image[None] | |
return image, scale_factor, (pad_h, pad_w) | |
def visualize(image, bboxes, labels, scores, texts): | |
detections = sv.Detections(xyxy=bboxes, class_id=labels, confidence=scores) | |
labels = [ | |
f"{texts[class_id][0]} {confidence:0.2f}" for class_id, confidence in | |
zip(detections.class_id, detections.confidence) | |
] | |
image = BOUNDING_BOX_ANNOTATOR.annotate(image, detections) | |
image = LABEL_ANNOTATOR.annotate(image, detections, labels=labels) | |
return image | |
def inference(ort_session, | |
image_path, | |
texts, | |
output_dir, | |
size=(640, 640), | |
**kwargs): | |
# normal export | |
# with NMS and postprocessing | |
ori_image = cv2.imread(image_path) | |
h, w = ori_image.shape[:2] | |
image, scale_factor, pad_param = preprocess(ori_image[:, :, [2, 1, 0]], | |
size) | |
input_ort = ort.OrtValue.ortvalue_from_numpy(image.transpose((0, 3, 1, 2))) | |
results = ort_session.run(["num_dets", "labels", "scores", "boxes"], | |
{"images": input_ort}) | |
num_dets, labels, scores, bboxes = results | |
num_dets = num_dets[0][0] | |
labels = labels[0, :num_dets] | |
scores = scores[0, :num_dets] | |
bboxes = bboxes[0, :num_dets] | |
bboxes -= np.array( | |
[pad_param[1], pad_param[0], pad_param[1], pad_param[0]]) | |
bboxes /= scale_factor | |
bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, w) | |
bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, h) | |
bboxes = bboxes.round().astype('int') | |
image_out = visualize(ori_image, bboxes, labels, scores, texts) | |
cv2.imwrite(osp.join(output_dir, osp.basename(image_path)), image_out) | |
return image_out | |
def inference_with_postprocessing(ort_session, | |
image_path, | |
texts, | |
output_dir, | |
size=(640, 640), | |
nms_thr=0.7, | |
score_thr=0.3, | |
max_dets=300): | |
# export with `--without-nms` | |
ori_image = cv2.imread(image_path) | |
h, w = ori_image.shape[:2] | |
image, scale_factor, pad_param = preprocess(ori_image[:, :, [2, 1, 0]], | |
size) | |
input_ort = ort.OrtValue.ortvalue_from_numpy(image.transpose((0, 3, 1, 2))) | |
results = ort_session.run(["scores", "boxes"], {"images": input_ort}) | |
scores, bboxes = results | |
# move numpy array to torch | |
ori_scores = torch.from_numpy(scores[0]).to('cuda:0') | |
ori_bboxes = torch.from_numpy(bboxes[0]).to('cuda:0') | |
scores_list = [] | |
labels_list = [] | |
bboxes_list = [] | |
# class-specific NMS | |
for cls_id in range(len(texts)): | |
cls_scores = ori_scores[:, cls_id] | |
labels = torch.ones(cls_scores.shape[0], dtype=torch.long) * cls_id | |
keep_idxs = nms(ori_bboxes, cls_scores, iou_threshold=nms_thr) | |
cur_bboxes = ori_bboxes[keep_idxs] | |
cls_scores = cls_scores[keep_idxs] | |
labels = labels[keep_idxs] | |
scores_list.append(cls_scores) | |
labels_list.append(labels) | |
bboxes_list.append(cur_bboxes) | |
scores = torch.cat(scores_list, dim=0) | |
labels = torch.cat(labels_list, dim=0) | |
bboxes = torch.cat(bboxes_list, dim=0) | |
keep_idxs = scores > score_thr | |
scores = scores[keep_idxs] | |
labels = labels[keep_idxs] | |
bboxes = bboxes[keep_idxs] | |
if len(keep_idxs) > max_dets: | |
_, sorted_idx = torch.sort(scores, descending=True) | |
keep_idxs = sorted_idx[:max_dets] | |
bboxes = bboxes[keep_idxs] | |
scores = scores[keep_idxs] | |
labels = labels[keep_idxs] | |
# Get candidate predict info by num_dets | |
scores = scores.cpu().numpy() | |
bboxes = bboxes.cpu().numpy() | |
labels = labels.cpu().numpy() | |
bboxes -= np.array( | |
[pad_param[1], pad_param[0], pad_param[1], pad_param[0]]) | |
bboxes /= scale_factor | |
bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, w) | |
bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, h) | |
bboxes = bboxes.round().astype('int') | |
image_out = visualize(ori_image, bboxes, labels, scores, texts) | |
cv2.imwrite(osp.join(output_dir, osp.basename(image_path)), image_out) | |
return image_out | |
def main(): | |
args = parse_args() | |
onnx_file = args.onnx | |
# init ONNX session | |
ort_session = ort.InferenceSession( | |
onnx_file, providers=['CUDAExecutionProvider', 'CPUExecutionProvider']) | |
print("Init ONNX Runtime session") | |
output_dir = "onnx_outputs" | |
if not osp.exists(output_dir): | |
os.mkdir(output_dir) | |
# load images | |
if not osp.isfile(args.image): | |
images = [ | |
osp.join(args.image, img) for img in os.listdir(args.image) | |
if img.endswith('.png') or img.endswith('.jpg') | |
] | |
else: | |
images = [args.image] | |
if args.text.endswith('.txt'): | |
with open(args.text) as f: | |
lines = f.readlines() | |
texts = [[t.rstrip('\r\n')] for t in lines] | |
elif args.text.endswith('.json'): | |
texts = json.load(open(args.text)) | |
else: | |
texts = [[t.strip()] for t in args.text.split(',')] | |
print("Start to inference.") | |
progress_bar = ProgressBar(len(images)) | |
if args.onnx_nms: | |
inference_func = inference | |
else: | |
inference_func = inference_with_postprocessing | |
for img in images: | |
inference_func(ort_session, img, texts, output_dir=output_dir) | |
progress_bar.update() | |
print("Finish inference") | |
if __name__ == "__main__": | |
main() | |