from typing import Tuple import gradio as gr import numpy as np import supervision as sv from ultralytics import YOLO import os MARKDOWN = """

YOLO-Application Toolkit 🚀

Welcome to the YOLO-Application Toolkit! This demo highlights the powerful detection capabilities of various YOLO models pre-trained on different datasets. 🎉 Easily detect different objects for various contexts in images on the go. Perfect for quick experimentation and practical use. 🎉🔍

**YOLO11**
Powered by [Ultralytics](https://github.com/ultralytics/ultralytics).🔥 """ # Roboflow [Inference](https://github.com/roboflow/inference), [Supervision](https://github.com/roboflow/supervision) and [Ultralytics](https://github.com/ultralytics/ultralytics).🔥 # Load models dynamically MODELS = { "YOLO11m (COCO128)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11m.pt"), "American Sign Language (ASL) (YOLO11m)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11m_sign_language.pt"), # "Microscopic Cell Detection (YOLO11m)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11m_microscope_cells.pt"), "Website Screenshots (YOLO11m)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11m_website_screenshots.pt"), "Zoo Animals (YOLO11m)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11m_zoo_animals.pt"), "Pinned Circuit Boards (YOLO11m)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11m_circuit_boards.pt"), "Smoke Detection (YOLO11m)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11m_smoke_detection.pt"), "Blood Cell Detection (YOLO11m)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11m_blood_cells.pt"), "Coins Detection (YOLO11m)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11m_coins.pt"), "Pizza Toppings Detection (YOLOv8s)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolov8s_pizza.pt"), "Aquarium Fish Detection (YOLO11m)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11m_aquarium_fish.pt"), # "Pelvis X-ray Detection (YOLO11m)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11m_pelvis_xray.pt"), "Road Signs Detection (YOLO11m)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11m_road_signs.pt"), # "Pizza Toppings Detection (YOLO11m)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11s_pizza.pt"), "Pelvis X-ray Segmentation (YOLO11s)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11s_seg_pelvis_xray.pt"), "Teeth X-ray Segmentation (YOLO11s)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11s_seg_teeth_xray.pt"), "Football Players Detection (YOLO11s)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11s_football_players.pt"), # "Litter Detection (YOLO11s)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11s_litter_detection.pt"), "Wheat Detection (YOLO11s)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11s_wheat_detection.pt"), "Signatures Detection (YOLO11s)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11s_signatures_detection.pt"), "Billiards Detection (YOLO11s)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11s_billiards_detection.pt"), "Car Parts Segmentation (YOLO11s)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11s_seg_car_parts.pt"), "YOLO11s-Pose (COCO128)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11s-pose.pt"), "Hand Keypoints Detection (YOLO11s)": YOLO("https://huggingface.co/mbar0075/YOLO-Application-Toolkit/resolve/main/yolo11s_hand_keypoints.pt"), } example_dir = "https://huggingface.co/spaces/mbar0075/YOLO-Application-Toolkit/resolve/main/examples/" # Your existing example dictionary EXAMPLE_DICT = { "YOLO11m (COCO128)": example_dir + "1.jpg", "American Sign Language (ASL) (YOLO11m)": example_dir + "2.jpg", # "Microscopic Cell Detection (YOLO11m)": example_dir + "3.jpg", "Website Screenshots (YOLO11m)": example_dir + "4.jpg", "Zoo Animals (YOLO11m)": example_dir + "5.jpg", "Pinned Circuit Boards (YOLO11m)": example_dir + "6.jpg", "Smoke Detection (YOLO11m)": example_dir + "7.jpg", "Blood Cell Detection (YOLO11m)": example_dir + "8.jpg", "Coins Detection (YOLO11m)": example_dir + "9.jpg", "Pizza Toppings Detection (YOLOv8s)": example_dir + "10.jpg", "Aquarium Fish Detection (YOLO11m)": example_dir + "11.jpg", # "Pelvis X-ray Detection (YOLO11m)": example_dir + "12.jpg", "Road Signs Detection (YOLO11m)": example_dir + "13.jpg", # "Pizza Toppings Detection (YOLO11m)": example_dir + "10.jpg", "Pelvis X-ray Segmentation (YOLO11s)": example_dir + "12.jpg", "Teeth X-ray Segmentation (YOLO11s)": example_dir + "14.jpg", "Football Players Detection (YOLO11s)": example_dir + "15.jpg", # "Litter Detection (YOLO11s)": example_dir + "16.jpg", "Wheat Detection (YOLO11s)": example_dir + "17.png", "Signatures Detection (YOLO11s)": example_dir + "18.jpg", "Billiards Detection (YOLO11s)": example_dir + "19.jpg", "Car Parts Segmentation (YOLO11s)": example_dir + "20.jpg", "YOLO11s-Pose (COCO128)": example_dir + "21.jpg", "Hand Keypoints Detection (YOLO11s)": example_dir + "22.jpg", } LABEL_ANNOTATORS = sv.LabelAnnotator() BOUNDING_BOX_ANNOTATORS = sv.BoxAnnotator() def detect_and_annotate( model, input_image: np.ndarray, confidence_threshold: float, iou_threshold: float, class_id_mapping: dict = None ) -> np.ndarray: result = model(input_image, conf=confidence_threshold, iou=iou_threshold)[0] # Extracting Annotated Image return result.plot() # For supervision annotations: detections = sv.Detections.from_ultralytics(result) if class_id_mapping: detections.class_id = np.array([class_id_mapping[class_id] for class_id in detections.class_id]) labels = [f"{class_name} ({confidence:.2f})" for class_name, confidence in zip(detections['class_name'], detections.confidence)] annotated_image = input_image.copy() annotated_image = BOUNDING_BOX_ANNOTATORS.annotate(scene=annotated_image, detections=detections) annotated_image = LABEL_ANNOTATORS.annotate(scene=annotated_image, detections=detections, labels=labels) return annotated_image def process_image( input_image, yolov11_confidence_threshold: float, iou_threshold: float, model_name: str ) -> np.ndarray: # Load the selected model from the preloaded models model = MODELS[model_name] # Process the image return detect_and_annotate(model, np.array(input_image), yolov11_confidence_threshold, iou_threshold) # Gradio UI components yolo_11s_confidence_threshold_component = gr.Slider( minimum=0, maximum=1.0, value=0.3, step=0.01, label="YOLO Confidence Threshold", info=( "The confidence threshold for the YOLO model. Lower the threshold to " "reduce false negatives, enhancing the model's sensitivity to detect " "sought-after objects. Conversely, increase the threshold to minimize false " "positives, preventing the model from identifying objects it shouldn't." ) ) iou_threshold_component = gr.Slider( minimum=0, maximum=1.0, value=0.5, step=0.01, label="IoU Threshold", info=( "The Intersection over Union (IoU) threshold for non-maximum suppression. " "Decrease the value to lessen the occurrence of overlapping bounding boxes, " "making the detection process stricter. On the other hand, increase the value " "to allow more overlapping bounding boxes, accommodating a broader range of " "detections." ) ) model_dropdown = gr.Dropdown( choices=list(MODELS.keys()), label="Select Model", value="YOLO11m (COCO128)", info=( "Choose the YOLO model you want to use for object detection. Each model is " "trained on a specific dataset, making them suitable for various detection tasks." ) ) def update_example(model_name): return EXAMPLE_DICT[model_name] with gr.Blocks() as demo: gr.Markdown(MARKDOWN) with gr.Accordion("Configuration", open=False): yolo_11s_confidence_threshold_component.render() iou_threshold_component.render() with gr.Row(): model_dropdown.render() with gr.Row(): image_input_component = gr.Image(type='pil', label='Input Image') yolo_11s_output_component = gr.Image(type='pil', label='YOLO Output') submit_button = gr.Button(value='Submit', scale=1, variant='primary') gr.Examples( fn=process_image, examples=[[EXAMPLE_DICT[i], 0.3, 0.5, i] for i in EXAMPLE_DICT.keys()], inputs=[image_input_component, yolo_11s_confidence_threshold_component, iou_threshold_component, model_dropdown], outputs=[yolo_11s_output_component] ) model_dropdown.change(fn=update_example, inputs=model_dropdown, outputs=image_input_component) submit_button.click( fn=process_image, inputs=[image_input_component, yolo_11s_confidence_threshold_component, iou_threshold_component, model_dropdown], outputs=[yolo_11s_output_component] ) demo.launch(debug=False, show_error=True, max_threads=1)