from transformers import DetrImageProcessor, DetrForObjectDetection from PIL import Image, ImageDraw import requests import torch import numpy as np import gradio as gr # using the pre-trained model for image processing image_processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50") # using the pre-trained model for object detection model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50") def detect_objects(image): # convert image from NumPy array to PIL format image = Image.fromarray(image) # process the image inputs = image_processor(images = image, return_tensors = "pt") outputs = model(**inputs) # create the target size in the format of (height,width) target_sizes = torch.tensor([image.size[::-1]]) # detect objects in image results = image_processor.post_process_object_detection( outputs, target_sizes = target_sizes, threshold = 0.9)[0] draw = ImageDraw.Draw(image) for score, label, box in zip(results["scores"], results["labels"], results["boxes"]): box = [round(i, 2) for i in box.tolist()] # draw bounding box around object draw.rectangle(box, outline="yellow", width=2) # display the object label draw.text((box[0], box[1]-10), model.config.id2label[label.item()], fill="white") return image demo = gr.Interface(detect_objects, inputs = gr.Image(width = 300, height = 300), # indicate the size of image to be passed in outputs = gr.Image(width = 300, height= 300), # indicate the size of image to be returned ) demo.launch()