YOLO-Application Toolkit 🚀

from typing import Tuple
import gradio as gr
import numpy as np
import supervision as sv
from ultralytics import YOLO
import os

MARKDOWN = """
<h1 style='text-align: left'>YOLO-Application Toolkit 🚀</h1>
<p>Welcome to the YOLO-Application Toolkit! This demo highlights the powerful detection capabilities of various YOLO models pre-trained on different datasets. 🎉

Easily detect different objects for various contexts in images on the go. Perfect for quick experimentation and practical use. 🎉🔍</p>


**YOLO11**
<div style="display: flex; align-items: center;">
    <a href="https://docs.ultralytics.com/models/yolo11/" style="margin-right: 10px;">
      <img src="https://badges.aleen42.com/src/github.svg">
    </a>
    <a href="https://colab.research.google.com/github/roboflow-ai/notebooks/blob/main/notebooks/train-yolo11-object-detection-on-custom-dataset.ipynb?ref=blog.roboflow.com" style="margin-right: 10px;">
      <img src="https://colab.research.google.com/assets/colab-badge.svg">
    </a>
  </div>

Powered by 
[Ultralytics](https://github.com/ultralytics/ultralytics).🔥

"""
# Roboflow [Inference](https://github.com/roboflow/inference), [Supervision](https://github.com/roboflow/supervision) and [Ultralytics](https://github.com/ultralytics/ultralytics).🔥

# Load models dynamically

MODELS = {
    "YOLO11m (COCO128)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11m.pt"),
    "American Sign Language (ASL) (YOLO11m)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11m_sign_language.pt"),
    # "Microscopic Cell Detection (YOLO11m)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11m_microscope_cells.pt"),
    "Website Screenshots (YOLO11m)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11m_website_screenshots.pt"),
    "Zoo Animals (YOLO11m)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11m_zoo_animals.pt"),
    "Pinned Circuit Boards (YOLO11m)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11m_circuit_boards.pt"),
    "Smoke Detection (YOLO11m)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11m_smoke_detection.pt"),
    "Blood Cell Detection (YOLO11m)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11m_blood_cells.pt"),
    "Coins Detection (YOLO11m)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11m_coins.pt"),
    "Pizza Toppings Detection (YOLOv8s)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolov8s_pizza.pt"),
    "Aquarium Fish Detection (YOLO11m)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11m_aquarium_fish.pt"),
    # "Pelvis X-ray Detection (YOLO11m)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11m_pelvis_xray.pt"),
    "Road Signs Detection (YOLO11m)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11m_road_signs.pt"),
    # "Pizza Toppings Detection (YOLO11m)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11s_pizza.pt"),
    "Pelvis X-ray Segmentation (YOLO11s)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11s_seg_pelvis_xray.pt"),
    "Teeth X-ray Segmentation (YOLO11s)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11s_seg_teeth_xray.pt"),
    "Football Players Detection (YOLO11s)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11s_football_players.pt"),
    # "Litter Detection (YOLO11s)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11s_litter_detection.pt"),
    "Wheat Detection (YOLO11s)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11s_wheat_detection.pt"),
    "Signatures Detection (YOLO11s)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11s_signatures_detection.pt"),
    "Billiards Detection (YOLO11s)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11s_billiards_detection.pt"),
    "Car Parts Segmentation (YOLO11s)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11s_seg_car_parts.pt"),
    "YOLO11s-Pose (COCO128)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11s-pose.pt"),
    "Hand Keypoints Detection (YOLO11s)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11s_hand_keypoints.pt"),
}


example_dir = "https://huggingface.co/spaces/mbar0075/YOLO-Application-Toolkit/resolve/main/examples/"
# Your existing example dictionary
EXAMPLE_DICT = {
    "YOLO11m (COCO128)": example_dir + "1.jpg",
    "American Sign Language (ASL) (YOLO11m)": example_dir + "2.jpg",
    # "Microscopic Cell Detection (YOLO11m)": example_dir + "3.jpg",
    "Website Screenshots (YOLO11m)": example_dir + "4.jpg",
    "Zoo Animals (YOLO11m)": example_dir + "5.jpg",
    "Pinned Circuit Boards (YOLO11m)": example_dir + "6.jpg",
    "Smoke Detection (YOLO11m)": example_dir + "7.jpg",
    "Blood Cell Detection (YOLO11m)": example_dir + "8.jpg",
    "Coins Detection (YOLO11m)": example_dir + "9.jpg",
    "Pizza Toppings Detection (YOLOv8s)": example_dir + "10.jpg",
    "Aquarium Fish Detection (YOLO11m)": example_dir + "11.jpg",
    # "Pelvis X-ray Detection (YOLO11m)": example_dir + "12.jpg",
    "Road Signs Detection (YOLO11m)": example_dir + "13.jpg",
    # "Pizza Toppings Detection (YOLO11m)": example_dir + "10.jpg",
    "Pelvis X-ray Segmentation (YOLO11s)": example_dir + "12.jpg",
    "Teeth X-ray Segmentation (YOLO11s)": example_dir + "14.jpg",
    "Football Players Detection (YOLO11s)": example_dir + "15.jpg",
    # "Litter Detection (YOLO11s)": example_dir + "16.jpg",
    "Wheat Detection (YOLO11s)": example_dir + "17.png",
    "Signatures Detection (YOLO11s)": example_dir + "18.jpg",
    "Billiards Detection (YOLO11s)": example_dir + "19.jpg",
    "Car Parts Segmentation (YOLO11s)": example_dir + "20.jpg",
    "YOLO11s-Pose (COCO128)": example_dir + "21.jpg",
    "Hand Keypoints Detection (YOLO11s)": example_dir + "22.jpg",
}

LABEL_ANNOTATORS = sv.LabelAnnotator()
BOUNDING_BOX_ANNOTATORS = sv.BoxAnnotator()

def detect_and_annotate(
    model,
    input_image: np.ndarray,
    confidence_threshold: float,
    iou_threshold: float,
    class_id_mapping: dict = None
) -> np.ndarray:
    result = model(input_image, conf=confidence_threshold, iou=iou_threshold)[0]
    # Extracting Annotated Image
    return result.plot()

    # For supervision annotations:
    detections = sv.Detections.from_ultralytics(result)

    if class_id_mapping:
        detections.class_id = np.array([class_id_mapping[class_id] for class_id in detections.class_id])

    labels = [f"{class_name} ({confidence:.2f})" for class_name, confidence in zip(detections['class_name'], detections.confidence)]
    
    annotated_image = input_image.copy()
    annotated_image = BOUNDING_BOX_ANNOTATORS.annotate(scene=annotated_image, detections=detections)
    annotated_image = LABEL_ANNOTATORS.annotate(scene=annotated_image, detections=detections, labels=labels)
    return annotated_image

def process_image(
    input_image,
    yolov11_confidence_threshold: float,
    iou_threshold: float,
    model_name: str
) -> np.ndarray:
    # Load the selected model from the preloaded models
    model = MODELS[model_name]

    # Process the image
    return detect_and_annotate(model, np.array(input_image), yolov11_confidence_threshold, iou_threshold)

# Gradio UI components
yolo_11s_confidence_threshold_component = gr.Slider(
    minimum=0,
    maximum=1.0,
    value=0.3,
    step=0.01,
    label="YOLO Confidence Threshold",
    info=(
        "The confidence threshold for the YOLO model. Lower the threshold to "
        "reduce false negatives, enhancing the model's sensitivity to detect "
        "sought-after objects. Conversely, increase the threshold to minimize false "
        "positives, preventing the model from identifying objects it shouldn't."
    )
)

iou_threshold_component = gr.Slider(
    minimum=0,
    maximum=1.0,
    value=0.5,
    step=0.01,
    label="IoU Threshold",
    info=(
        "The Intersection over Union (IoU) threshold for non-maximum suppression. "
        "Decrease the value to lessen the occurrence of overlapping bounding boxes, "
        "making the detection process stricter. On the other hand, increase the value "
        "to allow more overlapping bounding boxes, accommodating a broader range of "
        "detections."
    )
)

model_dropdown = gr.Dropdown(
    choices=list(MODELS.keys()),
    label="Select Model",
    value="YOLO11m (COCO128)",
    info=(
        "Choose the YOLO model you want to use for object detection. Each model is "
        "trained on a specific dataset, making them suitable for various detection tasks."
    )
)

def update_example(model_name):
    return EXAMPLE_DICT[model_name]

with gr.Blocks() as demo:
    gr.Markdown(MARKDOWN)
    
    with gr.Accordion("Configuration", open=False):
        yolo_11s_confidence_threshold_component.render()
        iou_threshold_component.render()

    with gr.Row():
        model_dropdown.render()

    with gr.Row():
        image_input_component = gr.Image(type='pil', label='Input Image')
        yolo_11s_output_component = gr.Image(type='pil', label='YOLO Output')

    submit_button = gr.Button(value='Submit', scale=1, variant='primary')

    gr.Examples(
        fn=process_image,
        examples=[[EXAMPLE_DICT[i], 0.3, 0.5, i] for i in EXAMPLE_DICT.keys()],
        inputs=[image_input_component, yolo_11s_confidence_threshold_component, iou_threshold_component, model_dropdown],
        outputs=[yolo_11s_output_component]
    )

    model_dropdown.change(fn=update_example, inputs=model_dropdown, outputs=image_input_component)

    submit_button.click(
        fn=process_image,
        inputs=[image_input_component, yolo_11s_confidence_threshold_component, iou_threshold_component, model_dropdown],
        outputs=[yolo_11s_output_component]
    )

demo.launch(debug=False, show_error=True, max_threads=1)