from transformers import DetrImageProcessor, DetrForObjectDetection
from PIL import Image, ImageDraw

import requests
import torch
import numpy as np
import gradio as gr

# using the pre-trained model for image processing
image_processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")

# using the pre-trained model for object detection
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")

def detect_objects(image):
    # convert image from NumPy array to PIL format
    image = Image.fromarray(image)
    
    # process the image
    inputs = image_processor(images = image, 
                             return_tensors = "pt")
    outputs = model(**inputs)
    
    # create the target size in the format of (height,width)
    target_sizes = torch.tensor([image.size[::-1]])

    # detect objects in image
    results = image_processor.post_process_object_detection(
                  outputs,
                  target_sizes = target_sizes, 
                  threshold = 0.9)[0]
    draw = ImageDraw.Draw(image)

    for score, label, box in zip(results["scores"], results["labels"],
      results["boxes"]):
        box = [round(i, 2) for i in box.tolist()]
        # draw bounding box around object
        draw.rectangle(box, 
                       outline="yellow", 
                       width=2)
        # display the object label
        draw.text((box[0], box[1]-10), 
                  model.config.id2label[label.item()], 
                  fill="white")  
    return image

demo = gr.Interface(detect_objects, 
                    inputs = gr.Image(width = 300, height = 300), 
                            # indicate the size of image to be passed in
                    outputs = gr.Image(width = 300, height= 300), 
                            # indicate the size of image to be returned
)
demo.launch()